Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
zdwls
commited on
Commit
·
b12b1dc
1
Parent(s):
a812c3b
init branch
Browse files- app.py +16 -2
- config.yaml +1 -0
app.py
CHANGED
|
@@ -116,11 +116,20 @@ for model in pbar:
|
|
| 116 |
ds = ds.map(add_task)
|
| 117 |
base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))}
|
| 118 |
# For now only one metric per task - Could add more metrics lateron
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
for task, metric in TASK_TO_METRIC.items():
|
| 120 |
-
ds_dict = ds.filter(lambda x: (x
|
| 121 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
| 122 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
| 123 |
|
|
|
|
| 124 |
# Save & cache EXTERNAL_MODEL_RESULTS
|
| 125 |
with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
|
| 126 |
json.dump(EXTERNAL_MODEL_RESULTS, f)
|
|
@@ -457,6 +466,7 @@ for board, board_config in BOARDS_CONFIG.items():
|
|
| 457 |
"data": boards_data[board]["data_tasks"][task_category],
|
| 458 |
"refresh": get_refresh_function(task_category, task_category_list),
|
| 459 |
"credits": credits,
|
|
|
|
| 460 |
})
|
| 461 |
|
| 462 |
dataframes = []
|
|
@@ -612,11 +622,15 @@ with gr.Blocks(css=css) as block:
|
|
| 612 |
# For updating the 'language' in the URL
|
| 613 |
item_tab.select(update_url_language, [current_task_language, language_per_task], [current_task_language, language_per_task], trigger_mode="always_last").then(None, [current_task_language], [], js=set_window_url_params)
|
| 614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
with gr.Row():
|
| 616 |
gr.Markdown(f"""
|
| 617 |
{item['description']}
|
| 618 |
|
| 619 |
-
- **Metric:** {
|
| 620 |
- **Languages:** {item['language_long'] if 'language_long' in item else item['language']}
|
| 621 |
{"- **Credits:** " + item['credits'] if ("credits" in item and item["credits"] is not None) else ''}
|
| 622 |
""")
|
|
|
|
| 116 |
ds = ds.map(add_task)
|
| 117 |
base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))}
|
| 118 |
# For now only one metric per task - Could add more metrics lateron
|
| 119 |
+
|
| 120 |
+
def filter_function(x, task, metric):
|
| 121 |
+
# This is a hack for the passkey and needle retrieval test, which reports ndcg_at_1 (i.e. accuracy), rather than the ndcg_at_10 that is commonly used for retrieval tasks.
|
| 122 |
+
if x['mteb_dataset_name'] in ['LEMBNeedleRetrieval', 'LEMBPasskeyRetrieval']:
|
| 123 |
+
return x["mteb_task"] == task and x['metric'] == 'ndcg_at_1'
|
| 124 |
+
else:
|
| 125 |
+
return x["mteb_task"] == task and x["metric"] == metric
|
| 126 |
+
|
| 127 |
for task, metric in TASK_TO_METRIC.items():
|
| 128 |
+
ds_dict = ds.filter(lambda x: filter_function(x, task, metric))["test"].to_dict()
|
| 129 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
| 130 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
| 131 |
|
| 132 |
+
print("********************hello********************")
|
| 133 |
# Save & cache EXTERNAL_MODEL_RESULTS
|
| 134 |
with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
|
| 135 |
json.dump(EXTERNAL_MODEL_RESULTS, f)
|
|
|
|
| 466 |
"data": boards_data[board]["data_tasks"][task_category],
|
| 467 |
"refresh": get_refresh_function(task_category, task_category_list),
|
| 468 |
"credits": credits,
|
| 469 |
+
"metric": board_config.get("metric", None),
|
| 470 |
})
|
| 471 |
|
| 472 |
dataframes = []
|
|
|
|
| 622 |
# For updating the 'language' in the URL
|
| 623 |
item_tab.select(update_url_language, [current_task_language, language_per_task], [current_task_language, language_per_task], trigger_mode="always_last").then(None, [current_task_language], [], js=set_window_url_params)
|
| 624 |
|
| 625 |
+
specific_metric = metric
|
| 626 |
+
if item.get("metric", None) is not None:
|
| 627 |
+
specific_metric = item['metric']
|
| 628 |
+
|
| 629 |
with gr.Row():
|
| 630 |
gr.Markdown(f"""
|
| 631 |
{item['description']}
|
| 632 |
|
| 633 |
+
- **Metric:** {specific_metric}
|
| 634 |
- **Languages:** {item['language_long'] if 'language_long' in item else item['language']}
|
| 635 |
{"- **Credits:** " + item['credits'] if ("credits" in item and item["credits"] is not None) else ''}
|
| 636 |
""")
|
config.yaml
CHANGED
|
@@ -301,6 +301,7 @@ boards:
|
|
| 301 |
icon: "📚"
|
| 302 |
special_icons: null
|
| 303 |
credits: "[LongEmbed](https://arxiv.org/abs/2404.12096v2)"
|
|
|
|
| 304 |
tasks:
|
| 305 |
Retrieval:
|
| 306 |
- LEMBNarrativeQARetrieval
|
|
|
|
| 301 |
icon: "📚"
|
| 302 |
special_icons: null
|
| 303 |
credits: "[LongEmbed](https://arxiv.org/abs/2404.12096v2)"
|
| 304 |
+
metric: nDCG@10 (for NarrativeQA, QMSum, SummScreenFD, WikimQA) & nDCG@1 (for passkey and needle)
|
| 305 |
tasks:
|
| 306 |
Retrieval:
|
| 307 |
- LEMBNarrativeQARetrieval
|