Spaces:
Running
Running
Muennighoff
commited on
Commit
•
bdf66dc
1
Parent(s):
d32c2e5
Update OpusparcusPC & LLM2Vec
Browse files- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +28 -27
EXTERNAL_MODEL_RESULTS.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -340,13 +340,13 @@ EXTERNAL_MODELS = [
|
|
340 |
"Cohere-embed-multilingual-light-v3.0",
|
341 |
"DanskBERT",
|
342 |
"LASER2",
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
"LaBSE",
|
344 |
-
# "LLM2Vec-Llama-supervised",
|
345 |
-
# "LLM2Vec-Llama-unsupervised",
|
346 |
-
# "LLM2Vec-Mistral-supervised",
|
347 |
-
# "LLM2Vec-Mistral-unsupervised",
|
348 |
-
# "LLM2Vec-Sheared-Llama-supervised",
|
349 |
-
# "LLM2Vec-Sheared-Llama-unsupervised",
|
350 |
"OpenSearch-text-hybrid",
|
351 |
"all-MiniLM-L12-v2",
|
352 |
"all-MiniLM-L6-v2",
|
@@ -456,21 +456,25 @@ EXTERNAL_MODELS = [
|
|
456 |
]
|
457 |
|
458 |
EXTERNAL_MODEL_TO_LINK = {
|
|
|
459 |
"Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0",
|
460 |
"Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
|
461 |
"Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
|
|
|
|
|
462 |
"LLM2Vec-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised",
|
463 |
"LLM2Vec-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp",
|
464 |
"LLM2Vec-Mistral-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
|
465 |
"LLM2Vec-Mistral-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
|
466 |
"LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
|
467 |
"LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
|
|
|
|
|
468 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
469 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
470 |
"all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
|
471 |
"all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
|
472 |
"all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
|
473 |
-
"Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
|
474 |
"bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased",
|
475 |
"bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased",
|
476 |
"bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased",
|
@@ -487,7 +491,6 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
487 |
"camembert-large": "https://huggingface.co/almanach/camembert-large",
|
488 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
489 |
"cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
|
490 |
-
"DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
|
491 |
"distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased",
|
492 |
"distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased",
|
493 |
"distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased",
|
@@ -520,8 +523,6 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
520 |
"herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
|
521 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
522 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
523 |
-
"LASER2": "https://github.com/facebookresearch/LASER",
|
524 |
-
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
525 |
"m3e-base": "https://huggingface.co/moka-ai/m3e-base",
|
526 |
"m3e-large": "https://huggingface.co/moka-ai/m3e-large",
|
527 |
"mistral-embed": "https://docs.mistral.ai/guides/embeddings",
|
@@ -538,7 +539,6 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
538 |
"nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
|
539 |
"norbert3-base": "https://huggingface.co/ltg/norbert3-base",
|
540 |
"norbert3-large": "https://huggingface.co/ltg/norbert3-large",
|
541 |
-
"OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
|
542 |
"paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
543 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
544 |
"sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base",
|
@@ -586,20 +586,23 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
586 |
}
|
587 |
|
588 |
EXTERNAL_MODEL_TO_DIM = {
|
|
|
589 |
"Cohere-embed-english-v3.0": 1024,
|
590 |
"Cohere-embed-multilingual-v3.0": 1024,
|
591 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
|
|
|
|
592 |
"LLM2Vec-Llama-supervised": 4096,
|
593 |
"LLM2Vec-Llama-unsupervised": 4096,
|
594 |
"LLM2Vec-Mistral-supervised": 4096,
|
595 |
"LLM2Vec-Mistral-unsupervised": 4096,
|
596 |
"LLM2Vec-Sheared-Llama-supervised": 2048,
|
597 |
"LLM2Vec-Sheared-Llama-unsupervised": 2048,
|
|
|
598 |
"all-MiniLM-L12-v2": 384,
|
599 |
"all-MiniLM-L6-v2": 384,
|
600 |
"all-mpnet-base-v2": 768,
|
601 |
"allenai-specter": 768,
|
602 |
-
"Baichuan-text-embedding": 1024,
|
603 |
"bert-base-10lang-cased": 768,
|
604 |
"bert-base-15lang-cased": 768,
|
605 |
"bert-base-25lang-cased": 768,
|
@@ -616,7 +619,6 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
616 |
"camembert-large": 768,
|
617 |
"contriever-base-msmarco": 768,
|
618 |
"cross-en-de-roberta-sentence-transformer": 768,
|
619 |
-
"DanskBERT": 768,
|
620 |
"distilbert-base-25lang-cased": 768,
|
621 |
"distilbert-base-en-fr-cased": 768,
|
622 |
"distilbert-base-en-fr-es-pt-it-cased": 768,
|
@@ -635,8 +637,6 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
635 |
"flaubert_base_uncased": 768,
|
636 |
"flaubert_large_cased": 1024,
|
637 |
"luotuo-bert-medium": 768,
|
638 |
-
"LASER2": 1024,
|
639 |
-
"LaBSE": 768,
|
640 |
"gbert-base": 768,
|
641 |
"gbert-large": 1024,
|
642 |
"gelectra-base": 768,
|
@@ -715,20 +715,23 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
715 |
}
|
716 |
|
717 |
EXTERNAL_MODEL_TO_SEQLEN = {
|
|
|
718 |
"Cohere-embed-english-v3.0": 512,
|
719 |
"Cohere-embed-multilingual-v3.0": 512,
|
720 |
-
"Cohere-embed-multilingual-light-v3.0": 512,
|
|
|
|
|
721 |
"LLM2Vec-Llama-supervised": 512,
|
722 |
"LLM2Vec-Llama-unsupervised": 512,
|
723 |
"LLM2Vec-Mistral-supervised": 512,
|
724 |
"LLM2Vec-Mistral-unsupervised": 512,
|
725 |
"LLM2Vec-Sheared-Llama-supervised": 512,
|
726 |
"LLM2Vec-Sheared-Llama-unsupervised": 512,
|
|
|
727 |
"all-MiniLM-L12-v2": 512,
|
728 |
"all-MiniLM-L6-v2": 512,
|
729 |
"all-mpnet-base-v2": 514,
|
730 |
"allenai-specter": 512,
|
731 |
-
"Baichuan-text-embedding": 512,
|
732 |
"bert-base-10lang-cased": 512,
|
733 |
"bert-base-15lang-cased": 512,
|
734 |
"bert-base-25lang-cased": 512,
|
@@ -749,8 +752,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
749 |
"distilbert-base-en-fr-cased": 512,
|
750 |
"distilbert-base-en-fr-es-pt-it-cased": 512,
|
751 |
"distilbert-base-fr-cased": 512,
|
752 |
-
"distilbert-base-uncased": 512,
|
753 |
-
"DanskBERT": 514,
|
754 |
"dfm-encoder-large-v1": 512,
|
755 |
"dfm-sentence-encoder-large-1": 512,
|
756 |
"distiluse-base-multilingual-cased-v2": 512,
|
@@ -778,8 +780,6 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
778 |
"herbert-base-retrieval-v2": 514,
|
779 |
"komninos": "N/A",
|
780 |
"luotuo-bert-medium": 512,
|
781 |
-
"LASER2": "N/A",
|
782 |
-
"LaBSE": 512,
|
783 |
"m3e-base": 512,
|
784 |
"m3e-large": 512,
|
785 |
# "mistral-embed": "?",
|
@@ -844,12 +844,15 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
844 |
}
|
845 |
|
846 |
EXTERNAL_MODEL_TO_SIZE = {
|
|
|
|
|
847 |
"LLM2Vec-Llama-supervised": 6607,
|
848 |
"LLM2Vec-Llama-unsupervised": 6607,
|
849 |
"LLM2Vec-Mistral-supervised": 7111,
|
850 |
"LLM2Vec-Mistral-unsupervised": 7111,
|
851 |
"LLM2Vec-Sheared-Llama-supervised": 1280,
|
852 |
"LLM2Vec-Sheared-Llama-unsupervised": 1280,
|
|
|
853 |
"allenai-specter": 110,
|
854 |
"all-MiniLM-L12-v2": 33,
|
855 |
"all-MiniLM-L6-v2": 23,
|
@@ -874,7 +877,6 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
874 |
"distilbert-base-en-fr-es-pt-it-cased": 110,
|
875 |
"distilbert-base-fr-cased": 110,
|
876 |
"distilbert-base-uncased": 110,
|
877 |
-
"DanskBERT": 125,
|
878 |
"distiluse-base-multilingual-cased-v2": 135,
|
879 |
"dfm-encoder-large-v1": 355,
|
880 |
"dfm-sentence-encoder-large-1": 355,
|
@@ -901,9 +903,7 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
901 |
"gtr-t5-xxl": 4865,
|
902 |
"herbert-base-retrieval-v2": 125,
|
903 |
"komninos": 134,
|
904 |
-
"luotuo-bert-medium": 328,
|
905 |
-
"LASER2": 43,
|
906 |
-
"LaBSE": 471,
|
907 |
"m3e-base": 102,
|
908 |
"m3e-large": 102,
|
909 |
"msmarco-bert-co-condensor": 110,
|
@@ -944,12 +944,12 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
944 |
}
|
945 |
|
946 |
PROPRIETARY_MODELS = {
|
|
|
947 |
"Cohere-embed-english-v3.0",
|
948 |
"Cohere-embed-multilingual-v3.0",
|
949 |
"Cohere-embed-multilingual-light-v3.0",
|
950 |
-
"Baichuan-text-embedding",
|
951 |
-
"mistral-embed",
|
952 |
"OpenSearch-text-hybrid",
|
|
|
953 |
"text-embedding-3-small",
|
954 |
"text-embedding-3-large",
|
955 |
"text-embedding-3-large-256",
|
@@ -973,6 +973,7 @@ PROPRIETARY_MODELS = {
|
|
973 |
"google-gecko.text-embedding-preview-0409",
|
974 |
"google-gecko-256.text-embedding-preview-0409",
|
975 |
}
|
|
|
976 |
PROPRIETARY_MODELS = {
|
977 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
|
978 |
for model in PROPRIETARY_MODELS
|
|
|
340 |
"Cohere-embed-multilingual-light-v3.0",
|
341 |
"DanskBERT",
|
342 |
"LASER2",
|
343 |
+
"LLM2Vec-Llama-supervised",
|
344 |
+
"LLM2Vec-Llama-unsupervised",
|
345 |
+
"LLM2Vec-Mistral-supervised",
|
346 |
+
"LLM2Vec-Mistral-unsupervised",
|
347 |
+
"LLM2Vec-Sheared-Llama-supervised",
|
348 |
+
"LLM2Vec-Sheared-Llama-unsupervised",
|
349 |
"LaBSE",
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
"OpenSearch-text-hybrid",
|
351 |
"all-MiniLM-L12-v2",
|
352 |
"all-MiniLM-L6-v2",
|
|
|
456 |
]
|
457 |
|
458 |
EXTERNAL_MODEL_TO_LINK = {
|
459 |
+
"Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
|
460 |
"Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0",
|
461 |
"Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
|
462 |
"Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
|
463 |
+
"DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
|
464 |
+
"LASER2": "https://github.com/facebookresearch/LASER",
|
465 |
"LLM2Vec-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised",
|
466 |
"LLM2Vec-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp",
|
467 |
"LLM2Vec-Mistral-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
|
468 |
"LLM2Vec-Mistral-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
|
469 |
"LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
|
470 |
"LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
|
471 |
+
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
472 |
+
"OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
|
473 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
474 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
475 |
"all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
|
476 |
"all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
|
477 |
"all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
|
|
|
478 |
"bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased",
|
479 |
"bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased",
|
480 |
"bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased",
|
|
|
491 |
"camembert-large": "https://huggingface.co/almanach/camembert-large",
|
492 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
493 |
"cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
|
|
|
494 |
"distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased",
|
495 |
"distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased",
|
496 |
"distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased",
|
|
|
523 |
"herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
|
524 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
525 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
|
|
|
|
526 |
"m3e-base": "https://huggingface.co/moka-ai/m3e-base",
|
527 |
"m3e-large": "https://huggingface.co/moka-ai/m3e-large",
|
528 |
"mistral-embed": "https://docs.mistral.ai/guides/embeddings",
|
|
|
539 |
"nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
|
540 |
"norbert3-base": "https://huggingface.co/ltg/norbert3-base",
|
541 |
"norbert3-large": "https://huggingface.co/ltg/norbert3-large",
|
|
|
542 |
"paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
543 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
544 |
"sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base",
|
|
|
586 |
}
|
587 |
|
588 |
EXTERNAL_MODEL_TO_DIM = {
|
589 |
+
"Baichuan-text-embedding": 1024,
|
590 |
"Cohere-embed-english-v3.0": 1024,
|
591 |
"Cohere-embed-multilingual-v3.0": 1024,
|
592 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
593 |
+
"DanskBERT": 768,
|
594 |
+
"LASER2": 1024,
|
595 |
"LLM2Vec-Llama-supervised": 4096,
|
596 |
"LLM2Vec-Llama-unsupervised": 4096,
|
597 |
"LLM2Vec-Mistral-supervised": 4096,
|
598 |
"LLM2Vec-Mistral-unsupervised": 4096,
|
599 |
"LLM2Vec-Sheared-Llama-supervised": 2048,
|
600 |
"LLM2Vec-Sheared-Llama-unsupervised": 2048,
|
601 |
+
"LaBSE": 768,
|
602 |
"all-MiniLM-L12-v2": 384,
|
603 |
"all-MiniLM-L6-v2": 384,
|
604 |
"all-mpnet-base-v2": 768,
|
605 |
"allenai-specter": 768,
|
|
|
606 |
"bert-base-10lang-cased": 768,
|
607 |
"bert-base-15lang-cased": 768,
|
608 |
"bert-base-25lang-cased": 768,
|
|
|
619 |
"camembert-large": 768,
|
620 |
"contriever-base-msmarco": 768,
|
621 |
"cross-en-de-roberta-sentence-transformer": 768,
|
|
|
622 |
"distilbert-base-25lang-cased": 768,
|
623 |
"distilbert-base-en-fr-cased": 768,
|
624 |
"distilbert-base-en-fr-es-pt-it-cased": 768,
|
|
|
637 |
"flaubert_base_uncased": 768,
|
638 |
"flaubert_large_cased": 1024,
|
639 |
"luotuo-bert-medium": 768,
|
|
|
|
|
640 |
"gbert-base": 768,
|
641 |
"gbert-large": 1024,
|
642 |
"gelectra-base": 768,
|
|
|
715 |
}
|
716 |
|
717 |
EXTERNAL_MODEL_TO_SEQLEN = {
|
718 |
+
"Baichuan-text-embedding": 512,
|
719 |
"Cohere-embed-english-v3.0": 512,
|
720 |
"Cohere-embed-multilingual-v3.0": 512,
|
721 |
+
"Cohere-embed-multilingual-light-v3.0": 512,
|
722 |
+
"DanskBERT": 514,
|
723 |
+
"LASER2": "N/A",
|
724 |
"LLM2Vec-Llama-supervised": 512,
|
725 |
"LLM2Vec-Llama-unsupervised": 512,
|
726 |
"LLM2Vec-Mistral-supervised": 512,
|
727 |
"LLM2Vec-Mistral-unsupervised": 512,
|
728 |
"LLM2Vec-Sheared-Llama-supervised": 512,
|
729 |
"LLM2Vec-Sheared-Llama-unsupervised": 512,
|
730 |
+
"LaBSE": 512,
|
731 |
"all-MiniLM-L12-v2": 512,
|
732 |
"all-MiniLM-L6-v2": 512,
|
733 |
"all-mpnet-base-v2": 514,
|
734 |
"allenai-specter": 512,
|
|
|
735 |
"bert-base-10lang-cased": 512,
|
736 |
"bert-base-15lang-cased": 512,
|
737 |
"bert-base-25lang-cased": 512,
|
|
|
752 |
"distilbert-base-en-fr-cased": 512,
|
753 |
"distilbert-base-en-fr-es-pt-it-cased": 512,
|
754 |
"distilbert-base-fr-cased": 512,
|
755 |
+
"distilbert-base-uncased": 512,
|
|
|
756 |
"dfm-encoder-large-v1": 512,
|
757 |
"dfm-sentence-encoder-large-1": 512,
|
758 |
"distiluse-base-multilingual-cased-v2": 512,
|
|
|
780 |
"herbert-base-retrieval-v2": 514,
|
781 |
"komninos": "N/A",
|
782 |
"luotuo-bert-medium": 512,
|
|
|
|
|
783 |
"m3e-base": 512,
|
784 |
"m3e-large": 512,
|
785 |
# "mistral-embed": "?",
|
|
|
844 |
}
|
845 |
|
846 |
EXTERNAL_MODEL_TO_SIZE = {
|
847 |
+
"DanskBERT": 125,
|
848 |
+
"LASER2": 43,
|
849 |
"LLM2Vec-Llama-supervised": 6607,
|
850 |
"LLM2Vec-Llama-unsupervised": 6607,
|
851 |
"LLM2Vec-Mistral-supervised": 7111,
|
852 |
"LLM2Vec-Mistral-unsupervised": 7111,
|
853 |
"LLM2Vec-Sheared-Llama-supervised": 1280,
|
854 |
"LLM2Vec-Sheared-Llama-unsupervised": 1280,
|
855 |
+
"LaBSE": 471,
|
856 |
"allenai-specter": 110,
|
857 |
"all-MiniLM-L12-v2": 33,
|
858 |
"all-MiniLM-L6-v2": 23,
|
|
|
877 |
"distilbert-base-en-fr-es-pt-it-cased": 110,
|
878 |
"distilbert-base-fr-cased": 110,
|
879 |
"distilbert-base-uncased": 110,
|
|
|
880 |
"distiluse-base-multilingual-cased-v2": 135,
|
881 |
"dfm-encoder-large-v1": 355,
|
882 |
"dfm-sentence-encoder-large-1": 355,
|
|
|
903 |
"gtr-t5-xxl": 4865,
|
904 |
"herbert-base-retrieval-v2": 125,
|
905 |
"komninos": 134,
|
906 |
+
"luotuo-bert-medium": 328,
|
|
|
|
|
907 |
"m3e-base": 102,
|
908 |
"m3e-large": 102,
|
909 |
"msmarco-bert-co-condensor": 110,
|
|
|
944 |
}
|
945 |
|
946 |
PROPRIETARY_MODELS = {
|
947 |
+
"Baichuan-text-embedding",
|
948 |
"Cohere-embed-english-v3.0",
|
949 |
"Cohere-embed-multilingual-v3.0",
|
950 |
"Cohere-embed-multilingual-light-v3.0",
|
|
|
|
|
951 |
"OpenSearch-text-hybrid",
|
952 |
+
"mistral-embed",
|
953 |
"text-embedding-3-small",
|
954 |
"text-embedding-3-large",
|
955 |
"text-embedding-3-large-256",
|
|
|
973 |
"google-gecko.text-embedding-preview-0409",
|
974 |
"google-gecko-256.text-embedding-preview-0409",
|
975 |
}
|
976 |
+
|
977 |
PROPRIETARY_MODELS = {
|
978 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
|
979 |
for model in PROPRIETARY_MODELS
|