idolezal commited on
Commit
23ee797
1 Parent(s): 2a4d35b

External check_significance() at czechllm.fit.vutbr.cz

Browse files
Files changed (1) hide show
  1. server.py +52 -4
server.py CHANGED
@@ -4,6 +4,7 @@ import json
4
  import os
5
  import hashlib
6
  import time
 
7
  from collections import namedtuple
8
  from xml.sax.saxutils import escape as xmlEscape, quoteattr as xmlQuoteAttr
9
 
@@ -11,7 +12,7 @@ import gradio as gr
11
  import pandas as pd
12
  from huggingface_hub import HfApi, snapshot_download
13
 
14
- from compare_significance import check_significance, SUPPORTED_METRICS
15
 
16
  VISIBLE_METRICS = SUPPORTED_METRICS + ["macro_f1"]
17
 
@@ -42,6 +43,49 @@ MARKDOWN_SPECIAL_CHARACTERS = {
42
  "|": "|"
43
  }
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  class LeaderboardServer:
46
  def __init__(self):
47
  self.server_address = REPO
@@ -213,10 +257,14 @@ class LeaderboardServer:
213
  new_tournament[new_submission_id][new_submission_id] = {
214
  task: False for task in self.tasks_metadata.keys()
215
  }
216
-
217
  for competitor_id in self.submission_ids:
218
- res = check_significance(new_model_file, self.submission_id_to_file[competitor_id])
219
- res_inverse = check_significance(self.submission_id_to_file[competitor_id], new_model_file)
 
 
 
 
220
  new_tournament[new_submission_id][competitor_id] = {
221
  task: data["significant"] for task, data in res.items()
222
  }
 
4
  import os
5
  import hashlib
6
  import time
7
+ import requests
8
  from collections import namedtuple
9
  from xml.sax.saxutils import escape as xmlEscape, quoteattr as xmlQuoteAttr
10
 
 
12
  import pandas as pd
13
  from huggingface_hub import HfApi, snapshot_download
14
 
15
+ from compare_significance import SUPPORTED_METRICS
16
 
17
  VISIBLE_METRICS = SUPPORTED_METRICS + ["macro_f1"]
18
 
 
43
  "|": "|"
44
  }
45
 
46
+ def check_significance_send_task(model_a_path, model_b_path):
47
+ url = 'https://czechllm.fit.vutbr.cz/benczechmark-leaderboard/compare_significance/'
48
+
49
+ # prepare and send request
50
+ with (
51
+ open(model_a_path, 'rb') as model_a_fp,
52
+ open(model_b_path, 'rb') as model_b_fp,
53
+ ):
54
+ files = {
55
+ 'model_a': model_a_fp,
56
+ 'model_b': model_b_fp,
57
+ }
58
+ response = requests.post(url, files=files)
59
+
60
+ # check response
61
+ if response.status_code == 202:
62
+ result_url = response.url
63
+ #task_id = response.json()['task_id']
64
+ elif response.status_code == 429:
65
+ raise RuntimeError('Server is too busy. Please try again later.') # TODO: try-except do raise gr.error
66
+ else:
67
+ raise RuntimeError(f'Failed to submit task. Status code: {response.status_code}') # TODO: try-except do raise gr.error
68
+
69
+ return result_url
70
+
71
+ def check_significance_wait_for_result(result_url):
72
+ while True:
73
+ response = requests.get(result_url)
74
+ if response.status_code == 200:
75
+ result = response.json()
76
+ break
77
+ elif response.status_code == 202:
78
+ time.sleep(5)
79
+ else:
80
+ raise RuntimeError(f'Failed to get result. Status code: {response.status_code}') # TODO: try-except do raise gr.error
81
+
82
+ return result['result']
83
+
84
+ def check_significance(model_a_path, model_b_path):
85
+ result_url = check_significance_send_task(model_a_path, model_b_path)
86
+ result = check_significance_wait_for_result(result_url)
87
+ return result
88
+
89
  class LeaderboardServer:
90
  def __init__(self):
91
  self.server_address = REPO
 
257
  new_tournament[new_submission_id][new_submission_id] = {
258
  task: False for task in self.tasks_metadata.keys()
259
  }
260
+
261
  for competitor_id in self.submission_ids:
262
+ res = check_significance_send_task(new_model_file, self.submission_id_to_file[competitor_id])
263
+ res_inverse = check_significance_send_task(self.submission_id_to_file[competitor_id], new_model_file)
264
+
265
+ res = check_significance_wait_for_result(res)
266
+ res_inverse = check_significance_wait_for_result(res_inverse)
267
+
268
  new_tournament[new_submission_id][competitor_id] = {
269
  task: data["significant"] for task, data in res.items()
270
  }