Spaces:
Running
Running
Muennighoff
commited on
Commit
•
4f1ef5f
1
Parent(s):
091482a
Update dataset
Browse files
app.py
CHANGED
@@ -58,11 +58,11 @@ TASK_LIST_CLASSIFICATION_NB = [
|
|
58 |
]
|
59 |
|
60 |
TASK_LIST_CLASSIFICATION_PL = [
|
61 |
-
"AbusiveClauses",
|
62 |
"AllegroReviews",
|
63 |
"CBD",
|
64 |
"MassiveIntentClassification (pl)",
|
65 |
"MassiveScenarioClassification (pl)",
|
|
|
66 |
"PolEmo2.0-IN",
|
67 |
"PolEmo2.0-OUT",
|
68 |
]
|
@@ -304,6 +304,7 @@ EXTERNAL_MODELS = [
|
|
304 |
"gtr-t5-large",
|
305 |
"gtr-t5-xl",
|
306 |
"gtr-t5-xxl",
|
|
|
307 |
"komninos",
|
308 |
"luotuo-bert-medium",
|
309 |
"LASER2",
|
@@ -379,6 +380,7 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
379 |
"gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
|
380 |
"gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
|
381 |
"gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
|
|
|
382 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
383 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
384 |
"LASER2": "https://github.com/facebookresearch/LASER",
|
@@ -457,6 +459,7 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
457 |
"gtr-t5-large": 768,
|
458 |
"gtr-t5-xl": 768,
|
459 |
"gtr-t5-xxl": 768,
|
|
|
460 |
"komninos": 300,
|
461 |
"m3e-base": 768,
|
462 |
"m3e-large": 768,
|
@@ -529,6 +532,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
529 |
"gtr-t5-large": 512,
|
530 |
"gtr-t5-xl": 512,
|
531 |
"gtr-t5-xxl": 512,
|
|
|
532 |
"komninos": "N/A",
|
533 |
"luotuo-bert-medium": 512,
|
534 |
"LASER2": "N/A",
|
@@ -604,6 +608,7 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
604 |
"gtr-t5-large": 0.67,
|
605 |
"gtr-t5-xl": 2.48,
|
606 |
"gtr-t5-xxl": 9.73,
|
|
|
607 |
"komninos": 0.27,
|
608 |
"luotuo-bert-medium": 1.31,
|
609 |
"LASER2": 0.17,
|
@@ -810,7 +815,6 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
810 |
# ],
|
811 |
# },
|
812 |
# Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
|
813 |
-
print("RUNNING", model)
|
814 |
if len(datasets) > 0:
|
815 |
task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])]
|
816 |
elif langs:
|
|
|
58 |
]
|
59 |
|
60 |
TASK_LIST_CLASSIFICATION_PL = [
|
|
|
61 |
"AllegroReviews",
|
62 |
"CBD",
|
63 |
"MassiveIntentClassification (pl)",
|
64 |
"MassiveScenarioClassification (pl)",
|
65 |
+
"PAC",
|
66 |
"PolEmo2.0-IN",
|
67 |
"PolEmo2.0-OUT",
|
68 |
]
|
|
|
304 |
"gtr-t5-large",
|
305 |
"gtr-t5-xl",
|
306 |
"gtr-t5-xxl",
|
307 |
+
"herbert-base-retrieval-v2",
|
308 |
"komninos",
|
309 |
"luotuo-bert-medium",
|
310 |
"LASER2",
|
|
|
380 |
"gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
|
381 |
"gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
|
382 |
"gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
|
383 |
+
"herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
|
384 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
385 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
386 |
"LASER2": "https://github.com/facebookresearch/LASER",
|
|
|
459 |
"gtr-t5-large": 768,
|
460 |
"gtr-t5-xl": 768,
|
461 |
"gtr-t5-xxl": 768,
|
462 |
+
"herbert-base-retrieval-v2": 768,
|
463 |
"komninos": 300,
|
464 |
"m3e-base": 768,
|
465 |
"m3e-large": 768,
|
|
|
532 |
"gtr-t5-large": 512,
|
533 |
"gtr-t5-xl": 512,
|
534 |
"gtr-t5-xxl": 512,
|
535 |
+
"herbert-base-retrieval-v2": 514,
|
536 |
"komninos": "N/A",
|
537 |
"luotuo-bert-medium": 512,
|
538 |
"LASER2": "N/A",
|
|
|
608 |
"gtr-t5-large": 0.67,
|
609 |
"gtr-t5-xl": 2.48,
|
610 |
"gtr-t5-xxl": 9.73,
|
611 |
+
"herbert-base-retrieval-v2": 0.50,
|
612 |
"komninos": 0.27,
|
613 |
"luotuo-bert-medium": 1.31,
|
614 |
"LASER2": 0.17,
|
|
|
815 |
# ],
|
816 |
# },
|
817 |
# Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
|
|
|
818 |
if len(datasets) > 0:
|
819 |
task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])]
|
820 |
elif langs:
|