Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Tom Aarsen
commited on
Commit
•
ab565ba
1
Parent(s):
e2b41c8
Add search bar/filtering; always show Model Size
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
-
from functools import partial
|
2 |
import json
|
3 |
import os
|
|
|
4 |
|
5 |
from datasets import load_dataset
|
6 |
import gradio as gr
|
@@ -1098,7 +1099,7 @@ def add_rank(df):
|
|
1098 |
if len(cols_to_rank) == 1:
|
1099 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
1100 |
else:
|
1101 |
-
df.insert(
|
1102 |
df.sort_values("Average", ascending=False, inplace=True)
|
1103 |
df.insert(0, "Rank", list(range(1, len(df) + 1)))
|
1104 |
df = df.round(2)
|
@@ -1106,7 +1107,7 @@ def add_rank(df):
|
|
1106 |
df.fillna("", inplace=True)
|
1107 |
return df
|
1108 |
|
1109 |
-
def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=
|
1110 |
api = HfApi()
|
1111 |
models = api.list_models(filter="mteb")
|
1112 |
# Initialize list to models that we cannot fetch metadata from
|
@@ -1169,6 +1170,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
1169 |
except:
|
1170 |
pass
|
1171 |
df_list.append(out)
|
|
|
|
|
1172 |
df = pd.DataFrame(df_list)
|
1173 |
# If there are any models that are the same, merge them
|
1174 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
@@ -1217,26 +1220,26 @@ def get_mteb_average():
|
|
1217 |
|
1218 |
DATA_OVERALL = DATA_OVERALL.round(2)
|
1219 |
|
1220 |
-
DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION])
|
1221 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
1222 |
-
DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:,
|
1223 |
|
1224 |
-
DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model"] +
|
1225 |
-
DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:,
|
1226 |
|
1227 |
-
DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model"] +
|
1228 |
-
DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:,
|
1229 |
|
1230 |
-
DATA_RERANKING = add_rank(DATA_OVERALL[["Model"] +
|
1231 |
-
DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:,
|
1232 |
|
1233 |
-
DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model"] +
|
1234 |
-
DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:,
|
1235 |
|
1236 |
-
DATA_STS_EN = add_rank(DATA_OVERALL[["Model"] +
|
1237 |
-
DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:,
|
1238 |
|
1239 |
-
DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model"] +
|
1240 |
DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
|
1241 |
|
1242 |
# Fill NaN after averaging
|
@@ -1279,24 +1282,24 @@ def get_mteb_average_zh():
|
|
1279 |
|
1280 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
|
1281 |
|
1282 |
-
DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
1283 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
1284 |
-
DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:,
|
1285 |
|
1286 |
-
DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
1287 |
-
DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:,
|
1288 |
|
1289 |
-
DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
1290 |
-
DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:,
|
1291 |
|
1292 |
-
DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
1293 |
-
DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:,
|
1294 |
|
1295 |
-
DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
1296 |
-
DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:,
|
1297 |
|
1298 |
-
DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model"] +
|
1299 |
-
DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:,
|
1300 |
|
1301 |
# Fill NaN after averaging
|
1302 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
@@ -1339,25 +1342,25 @@ def get_mteb_average_fr():
|
|
1339 |
DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
|
1340 |
DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
|
1341 |
|
1342 |
-
DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
1343 |
-
DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:,
|
1344 |
|
1345 |
-
DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
1346 |
-
DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:,
|
1347 |
|
1348 |
-
DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
1349 |
-
DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:,
|
1350 |
|
1351 |
-
DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
1352 |
-
DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:,
|
1353 |
|
1354 |
-
DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
1355 |
-
DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:,
|
1356 |
|
1357 |
-
DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
1358 |
-
DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:,
|
1359 |
|
1360 |
-
DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model"] +
|
1361 |
DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
|
1362 |
|
1363 |
# Fill NaN after averaging
|
@@ -1398,21 +1401,21 @@ def get_mteb_average_pl():
|
|
1398 |
|
1399 |
DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
|
1400 |
|
1401 |
-
DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
1402 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
1403 |
-
DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:,
|
1404 |
|
1405 |
-
DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
1406 |
-
DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:,
|
1407 |
|
1408 |
-
DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
1409 |
-
DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:,
|
1410 |
|
1411 |
-
DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
1412 |
-
DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:,
|
1413 |
|
1414 |
-
DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model"] +
|
1415 |
-
DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:,
|
1416 |
|
1417 |
# Fill NaN after averaging
|
1418 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
@@ -1426,14 +1429,14 @@ get_mteb_average()
|
|
1426 |
get_mteb_average_fr()
|
1427 |
get_mteb_average_pl()
|
1428 |
get_mteb_average_zh()
|
1429 |
-
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
|
1430 |
-
DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)
|
1431 |
-
DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
|
1432 |
-
DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)
|
1433 |
-
DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
|
1434 |
-
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
|
1435 |
-
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
|
1436 |
-
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
|
1437 |
|
1438 |
# Exact, add all non-nan integer values for every dataset
|
1439 |
NUM_SCORES = 0
|
@@ -1476,7 +1479,7 @@ for d in [
|
|
1476 |
DATA_SUMMARIZATION_FR,
|
1477 |
]:
|
1478 |
# NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
|
1479 |
-
cols_to_ignore =
|
1480 |
# Count number of scores including only non-nan floats & excluding the rank column
|
1481 |
NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
|
1482 |
# Exclude rank & model name column (first two); Do not count different language versions as different datasets
|
@@ -1491,6 +1494,7 @@ NUM_MODELS = len(set(MODELS))
|
|
1491 |
# 1. Force headers to wrap
|
1492 |
# 2. Force model column (maximum) width
|
1493 |
# 3. Prevent model column from overflowing, scroll instead
|
|
|
1494 |
css = """
|
1495 |
table > thead {
|
1496 |
white-space: normal
|
@@ -1503,6 +1507,10 @@ table {
|
|
1503 |
table > tbody > tr > td:nth-child(2) > div {
|
1504 |
overflow-x: auto
|
1505 |
}
|
|
|
|
|
|
|
|
|
1506 |
"""
|
1507 |
|
1508 |
"""
|
@@ -1822,6 +1830,7 @@ data = {
|
|
1822 |
}
|
1823 |
|
1824 |
dataframes = []
|
|
|
1825 |
tabs = []
|
1826 |
|
1827 |
# The following JavaScript function updates the URL parameters based on the selected task and language
|
@@ -1854,6 +1863,57 @@ def update_url_language(event: gr.SelectData, current_task_language: dict, langu
|
|
1854 |
language_per_task[current_task_language["task"]] = event.target.id
|
1855 |
return current_task_language, language_per_task
|
1856 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1857 |
with gr.Blocks(css=css) as block:
|
1858 |
|
1859 |
# Store the current task and language for updating the URL. This is a bit hacky, but it works
|
@@ -1865,6 +1925,26 @@ with gr.Blocks(css=css) as block:
|
|
1865 |
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
1866 |
""")
|
1867 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1868 |
with gr.Tabs() as outer_tabs:
|
1869 |
# Store the tabs for updating them on load based on URL parameters
|
1870 |
tabs.append(outer_tabs)
|
@@ -1901,9 +1981,12 @@ with gr.Blocks(css=css) as block:
|
|
1901 |
|
1902 |
with gr.Row():
|
1903 |
datatype = ["number", "markdown"] + ["number"] * len(item["data"])
|
1904 |
-
dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", height=
|
1905 |
dataframes.append(dataframe)
|
1906 |
|
|
|
|
|
|
|
1907 |
with gr.Row():
|
1908 |
refresh_button = gr.Button("Refresh")
|
1909 |
refresh_button.click(item["refresh"], inputs=None, outputs=dataframe)
|
@@ -1950,6 +2033,10 @@ with gr.Blocks(css=css) as block:
|
|
1950 |
|
1951 |
block.load(set_tabs_on_load, inputs=[], outputs=tabs + [current_task_language, language_per_task])
|
1952 |
|
|
|
|
|
|
|
|
|
1953 |
block.queue(max_size=10)
|
1954 |
block.launch()
|
1955 |
|
|
|
1 |
+
from functools import partial, reduce
|
2 |
import json
|
3 |
import os
|
4 |
+
import re
|
5 |
|
6 |
from datasets import load_dataset
|
7 |
import gradio as gr
|
|
|
1099 |
if len(cols_to_rank) == 1:
|
1100 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
1101 |
else:
|
1102 |
+
df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
|
1103 |
df.sort_values("Average", ascending=False, inplace=True)
|
1104 |
df.insert(0, "Rank", list(range(1, len(df) + 1)))
|
1105 |
df = df.round(2)
|
|
|
1107 |
df.fillna("", inplace=True)
|
1108 |
return df
|
1109 |
|
1110 |
+
def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
|
1111 |
api = HfApi()
|
1112 |
models = api.list_models(filter="mteb")
|
1113 |
# Initialize list to models that we cannot fetch metadata from
|
|
|
1170 |
except:
|
1171 |
pass
|
1172 |
df_list.append(out)
|
1173 |
+
if len(df_list) >= 1:
|
1174 |
+
break
|
1175 |
df = pd.DataFrame(df_list)
|
1176 |
# If there are any models that are the same, merge them
|
1177 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
|
|
1220 |
|
1221 |
DATA_OVERALL = DATA_OVERALL.round(2)
|
1222 |
|
1223 |
+
DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION])
|
1224 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
1225 |
+
DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 3:].ne("").any(axis=1)]
|
1226 |
|
1227 |
+
DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING])
|
1228 |
+
DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 3:].ne("").any(axis=1)]
|
1229 |
|
1230 |
+
DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION])
|
1231 |
+
DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 3:].ne("").any(axis=1)]
|
1232 |
|
1233 |
+
DATA_RERANKING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING])
|
1234 |
+
DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 3:].ne("").any(axis=1)]
|
1235 |
|
1236 |
+
DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL])
|
1237 |
+
DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 3:].ne("").any(axis=1)]
|
1238 |
|
1239 |
+
DATA_STS_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS])
|
1240 |
+
DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 3:].ne("").any(axis=1)]
|
1241 |
|
1242 |
+
DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_SUMMARIZATION])
|
1243 |
DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
|
1244 |
|
1245 |
# Fill NaN after averaging
|
|
|
1282 |
|
1283 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
|
1284 |
|
1285 |
+
DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_ZH])
|
1286 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
1287 |
+
DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
1288 |
|
1289 |
+
DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_ZH])
|
1290 |
+
DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
1291 |
|
1292 |
+
DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_ZH])
|
1293 |
+
DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
1294 |
|
1295 |
+
DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING_ZH])
|
1296 |
+
DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
1297 |
|
1298 |
+
DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_ZH])
|
1299 |
+
DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
1300 |
|
1301 |
+
DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_ZH])
|
1302 |
+
DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 3:].ne("").any(axis=1)]
|
1303 |
|
1304 |
# Fill NaN after averaging
|
1305 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
|
|
1342 |
DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
|
1343 |
DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
|
1344 |
|
1345 |
+
DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_FR])
|
1346 |
+
DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 3:].ne("").any(axis=1)]
|
1347 |
|
1348 |
+
DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_FR])
|
1349 |
+
DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 3:].ne("").any(axis=1)]
|
1350 |
|
1351 |
+
DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_FR])
|
1352 |
+
DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 3:].ne("").any(axis=1)]
|
1353 |
|
1354 |
+
DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING_FR])
|
1355 |
+
DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 3:].ne("").any(axis=1)]
|
1356 |
|
1357 |
+
DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_FR])
|
1358 |
+
DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 3:].ne("").any(axis=1)]
|
1359 |
|
1360 |
+
DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_FR])
|
1361 |
+
DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 3:].ne("").any(axis=1)]
|
1362 |
|
1363 |
+
DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_SUMMARIZATION_FR])
|
1364 |
DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
|
1365 |
|
1366 |
# Fill NaN after averaging
|
|
|
1401 |
|
1402 |
DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
|
1403 |
|
1404 |
+
DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_PL])
|
1405 |
# Only keep rows with at least one score in addition to the "Model" & rank column
|
1406 |
+
DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 3:].ne("").any(axis=1)]
|
1407 |
|
1408 |
+
DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_PL])
|
1409 |
+
DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 3:].ne("").any(axis=1)]
|
1410 |
|
1411 |
+
DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_PL])
|
1412 |
+
DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 3:].ne("").any(axis=1)]
|
1413 |
|
1414 |
+
DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_PL])
|
1415 |
+
DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 3:].ne("").any(axis=1)]
|
1416 |
|
1417 |
+
DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_PL])
|
1418 |
+
DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 3:].ne("").any(axis=1)]
|
1419 |
|
1420 |
# Fill NaN after averaging
|
1421 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
|
|
1429 |
get_mteb_average_fr()
|
1430 |
get_mteb_average_pl()
|
1431 |
get_mteb_average_zh()
|
1432 |
+
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_BITEXT_MINING]
|
1433 |
+
DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)[["Rank", "Model", "Model Size (Million Parameters)"] + TASK_LIST_BITEXT_MINING_DA]
|
1434 |
+
DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_DA]
|
1435 |
+
DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_NB]
|
1436 |
+
DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_SV]
|
1437 |
+
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_OTHER]
|
1438 |
+
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLUSTERING_DE]
|
1439 |
+
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_STS_OTHER]
|
1440 |
|
1441 |
# Exact, add all non-nan integer values for every dataset
|
1442 |
NUM_SCORES = 0
|
|
|
1479 |
DATA_SUMMARIZATION_FR,
|
1480 |
]:
|
1481 |
# NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
|
1482 |
+
cols_to_ignore = 4 if "Average" in d.columns else 3
|
1483 |
# Count number of scores including only non-nan floats & excluding the rank column
|
1484 |
NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
|
1485 |
# Exclude rank & model name column (first two); Do not count different language versions as different datasets
|
|
|
1494 |
# 1. Force headers to wrap
|
1495 |
# 2. Force model column (maximum) width
|
1496 |
# 3. Prevent model column from overflowing, scroll instead
|
1497 |
+
# 4. Prevent checkbox groups from taking up too much space
|
1498 |
css = """
|
1499 |
table > thead {
|
1500 |
white-space: normal
|
|
|
1507 |
table > tbody > tr > td:nth-child(2) > div {
|
1508 |
overflow-x: auto
|
1509 |
}
|
1510 |
+
|
1511 |
+
.filter-checkbox-group {
|
1512 |
+
max-width: max-content;
|
1513 |
+
}
|
1514 |
"""
|
1515 |
|
1516 |
"""
|
|
|
1830 |
}
|
1831 |
|
1832 |
dataframes = []
|
1833 |
+
full_dataframes = []
|
1834 |
tabs = []
|
1835 |
|
1836 |
# The following JavaScript function updates the URL parameters based on the selected task and language
|
|
|
1863 |
language_per_task[current_task_language["task"]] = event.target.id
|
1864 |
return current_task_language, language_per_task
|
1865 |
|
1866 |
+
NUMERIC_INTERVALS = {
|
1867 |
+
"<100M": pd.Interval(0, 100, closed="right"),
|
1868 |
+
">100M, <500M": pd.Interval(100, 500, closed="right"),
|
1869 |
+
">500M, <1B": pd.Interval(500, 1000, closed="right"),
|
1870 |
+
">1B": pd.Interval(1000, 1_000_000, closed="right"),
|
1871 |
+
}
|
1872 |
+
|
1873 |
+
MODEL_TYPES = [
|
1874 |
+
"Open",
|
1875 |
+
"API",
|
1876 |
+
]
|
1877 |
+
|
1878 |
+
def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
1879 |
+
output_dataframes = []
|
1880 |
+
for df in full_dataframes:
|
1881 |
+
# df = pd.DataFrame(data=dataframe.value["data"], columns=dataframe.value["headers"])
|
1882 |
+
|
1883 |
+
# Apply the search query
|
1884 |
+
if search_query:
|
1885 |
+
names = df["Model"].map(lambda x: re.match("<a .+?>(.+)</a>", x).group(1))
|
1886 |
+
masks = []
|
1887 |
+
for query in search_query.split(";"):
|
1888 |
+
masks.append(names.str.contains(query))
|
1889 |
+
df = df[reduce(lambda a, b: a | b, masks)]
|
1890 |
+
|
1891 |
+
# Apply the model type filtering
|
1892 |
+
if model_types != MODEL_TYPES:
|
1893 |
+
masks = []
|
1894 |
+
for model_type in model_types:
|
1895 |
+
if model_type == "Open":
|
1896 |
+
masks.append(df["Model Size (Million Parameters)"] != "")
|
1897 |
+
elif model_type == "API":
|
1898 |
+
masks.append(df["Model Size (Million Parameters)"] == "")
|
1899 |
+
df = df[reduce(lambda a, b: a | b, masks)]
|
1900 |
+
|
1901 |
+
# Apply the model size filtering
|
1902 |
+
if model_sizes != ["?", *NUMERIC_INTERVALS.keys()]:
|
1903 |
+
masks = []
|
1904 |
+
# Handle the ? only
|
1905 |
+
if "?" in model_sizes:
|
1906 |
+
masks.append(df["Model Size (Million Parameters)"] == "")
|
1907 |
+
model_sizes.remove("?")
|
1908 |
+
# Handle the numeric intervals only
|
1909 |
+
numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[model_size] for model_size in model_sizes]))
|
1910 |
+
sizes = df["Model Size (Million Parameters)"].replace('', 0)
|
1911 |
+
masks.append(sizes.apply(lambda size: any(numeric_interval.contains(size))))
|
1912 |
+
df = df[reduce(lambda a, b: a | b, masks)]
|
1913 |
+
|
1914 |
+
output_dataframes.append(df)
|
1915 |
+
return output_dataframes
|
1916 |
+
|
1917 |
with gr.Blocks(css=css) as block:
|
1918 |
|
1919 |
# Store the current task and language for updating the URL. This is a bit hacky, but it works
|
|
|
1925 |
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
1926 |
""")
|
1927 |
|
1928 |
+
with gr.Row():
|
1929 |
+
search_bar = gr.Textbox(
|
1930 |
+
label="Search Bar",
|
1931 |
+
placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press enter...",
|
1932 |
+
)
|
1933 |
+
filter_model_type = gr.CheckboxGroup(
|
1934 |
+
label="Model types",
|
1935 |
+
choices=MODEL_TYPES,
|
1936 |
+
value=MODEL_TYPES,
|
1937 |
+
interactive=True,
|
1938 |
+
elem_classes=["filter-checkbox-group"]
|
1939 |
+
)
|
1940 |
+
filter_model_sizes = gr.CheckboxGroup(
|
1941 |
+
label="Model sizes (in number of parameters)",
|
1942 |
+
choices=["?"] + list(NUMERIC_INTERVALS.keys()),
|
1943 |
+
value=["?"] + list(NUMERIC_INTERVALS.keys()),
|
1944 |
+
interactive=True,
|
1945 |
+
elem_classes=["filter-checkbox-group"]
|
1946 |
+
)
|
1947 |
+
|
1948 |
with gr.Tabs() as outer_tabs:
|
1949 |
# Store the tabs for updating them on load based on URL parameters
|
1950 |
tabs.append(outer_tabs)
|
|
|
1981 |
|
1982 |
with gr.Row():
|
1983 |
datatype = ["number", "markdown"] + ["number"] * len(item["data"])
|
1984 |
+
dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", height=500)
|
1985 |
dataframes.append(dataframe)
|
1986 |
|
1987 |
+
full_dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", visible=False)
|
1988 |
+
full_dataframes.append(full_dataframe)
|
1989 |
+
|
1990 |
with gr.Row():
|
1991 |
refresh_button = gr.Button("Refresh")
|
1992 |
refresh_button.click(item["refresh"], inputs=None, outputs=dataframe)
|
|
|
2033 |
|
2034 |
block.load(set_tabs_on_load, inputs=[], outputs=tabs + [current_task_language, language_per_task])
|
2035 |
|
2036 |
+
search_bar.submit(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
|
2037 |
+
filter_model_type.change(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
|
2038 |
+
filter_model_sizes.change(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
|
2039 |
+
|
2040 |
block.queue(max_size=10)
|
2041 |
block.launch()
|
2042 |
|