Spaces:
Paused
Paused
use BackgroundScheduler to restart space
Browse files
app.py
CHANGED
@@ -16,6 +16,11 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
|
16 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
17 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
18 |
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def get_all_requested_models(requested_models_dir):
|
21 |
depth = 1
|
@@ -142,18 +147,18 @@ def get_leaderboard():
|
|
142 |
all_data.append(gpt35_values)
|
143 |
|
144 |
base_line = {
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
all_data.append(base_line)
|
156 |
-
|
157 |
df = pd.DataFrame.from_records(all_data)
|
158 |
df = df.sort_values(by=["Average ⬆️"], ascending=False)
|
159 |
df = df[COLS]
|
@@ -287,7 +292,7 @@ def add_new_eval(
|
|
287 |
f.write(json.dumps(eval_entry))
|
288 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
289 |
|
290 |
-
api = HfApi()
|
291 |
api.upload_file(
|
292 |
path_or_fileobj=out_path,
|
293 |
path_in_repo=out_path,
|
@@ -306,6 +311,7 @@ def refresh():
|
|
306 |
get_leaderboard(), get_eval_table()
|
307 |
return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
|
308 |
|
|
|
309 |
custom_css = """
|
310 |
#changelog-text {
|
311 |
font-size: 18px !important;
|
@@ -331,8 +337,8 @@ We chose these benchmarks as they test a variety of reasoning and general knowle
|
|
331 |
)
|
332 |
|
333 |
with gr.Accordion("CHANGELOG", open=False):
|
334 |
-
changelog = gr.Markdown(CHANGELOG_TEXT,elem_id="changelog-text")
|
335 |
-
|
336 |
with gr.Row():
|
337 |
leaderboard_table = gr.components.Dataframe(
|
338 |
value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5
|
@@ -415,4 +421,19 @@ We chose these benchmarks as they test a variety of reasoning and general knowle
|
|
415 |
],
|
416 |
submission_result,
|
417 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
demo.launch()
|
|
|
16 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
17 |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
|
18 |
|
19 |
+
api = HfApi(token=H4_TOKEN)
|
20 |
+
|
21 |
+
def restart_space():
|
22 |
+
api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard")
|
23 |
+
|
24 |
|
25 |
def get_all_requested_models(requested_models_dir):
|
26 |
depth = 1
|
|
|
147 |
all_data.append(gpt35_values)
|
148 |
|
149 |
base_line = {
|
150 |
+
"Model": "<p>Baseline</p>",
|
151 |
+
"Revision": "N/A",
|
152 |
+
"8bit": None,
|
153 |
+
"Average ⬆️": 25.0,
|
154 |
+
"ARC (25-shot) ⬆️": 25.0,
|
155 |
+
"HellaSwag (10-shot) ⬆️": 25.0,
|
156 |
+
"MMLU (5-shot) ⬆️": 25.0,
|
157 |
+
"TruthfulQA (0-shot) ⬆️": 25.0,
|
158 |
+
}
|
159 |
+
|
160 |
all_data.append(base_line)
|
161 |
+
|
162 |
df = pd.DataFrame.from_records(all_data)
|
163 |
df = df.sort_values(by=["Average ⬆️"], ascending=False)
|
164 |
df = df[COLS]
|
|
|
292 |
f.write(json.dumps(eval_entry))
|
293 |
LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
|
294 |
|
295 |
+
# api = HfApi()
|
296 |
api.upload_file(
|
297 |
path_or_fileobj=out_path,
|
298 |
path_in_repo=out_path,
|
|
|
311 |
get_leaderboard(), get_eval_table()
|
312 |
return leaderboard, finished_eval_queue, running_eval_queue, pending_eval_queue
|
313 |
|
314 |
+
|
315 |
custom_css = """
|
316 |
#changelog-text {
|
317 |
font-size: 18px !important;
|
|
|
337 |
)
|
338 |
|
339 |
with gr.Accordion("CHANGELOG", open=False):
|
340 |
+
changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
|
341 |
+
|
342 |
with gr.Row():
|
343 |
leaderboard_table = gr.components.Dataframe(
|
344 |
value=leaderboard, headers=COLS, datatype=TYPES, max_rows=5
|
|
|
421 |
],
|
422 |
submission_result,
|
423 |
)
|
424 |
+
|
425 |
+
# demo.load(
|
426 |
+
# refresh,
|
427 |
+
# inputs=[],
|
428 |
+
# outputs=[
|
429 |
+
# leaderboard_table,
|
430 |
+
# finished_eval_table,
|
431 |
+
# running_eval_table,
|
432 |
+
# pending_eval_table,
|
433 |
+
# ],
|
434 |
+
# )
|
435 |
+
|
436 |
+
scheduler = BackgroundScheduler()
|
437 |
+
scheduler.add_job(restart_space, 'interval', seconds=3600)
|
438 |
+
scheduler.start()
|
439 |
demo.launch()
|