Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Tom Aarsen
commited on
Commit
•
5613607
1
Parent(s):
e82960d
Use separate proprietary models list
Browse files
app.py
CHANGED
@@ -874,11 +874,42 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
874 |
"text2vec-large-chinese": 326,
|
875 |
"unsup-simcse-bert-base-uncased": 110,
|
876 |
"use-cmlm-multilingual": 472,
|
877 |
-
|
878 |
"xlm-roberta-base": 279,
|
879 |
"xlm-roberta-large": 560,
|
880 |
}
|
881 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
882 |
MODELS_TO_SKIP = {
|
883 |
"baseplate/instructor-large-1", # Duplicate
|
884 |
"radames/e5-large", # Duplicate
|
@@ -1996,7 +2027,7 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
|
1996 |
if model_type == "Open":
|
1997 |
masks.append(df["Model Size (Million Parameters)"] != "")
|
1998 |
elif model_type == "Proprietary":
|
1999 |
-
masks.append(df["Model
|
2000 |
elif model_type == "Sentence Transformers":
|
2001 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
2002 |
if masks:
|
|
|
874 |
"text2vec-large-chinese": 326,
|
875 |
"unsup-simcse-bert-base-uncased": 110,
|
876 |
"use-cmlm-multilingual": 472,
|
877 |
+
"voyage-lite-02-instruct": 1220,
|
878 |
"xlm-roberta-base": 279,
|
879 |
"xlm-roberta-large": 560,
|
880 |
}
|
881 |
|
882 |
+
PROPRIETARY_MODELS = {
|
883 |
+
"Cohere-embed-multilingual-v3.0",
|
884 |
+
"Cohere-embed-multilingual-light-v3.0",
|
885 |
+
"Baichuan-text-embedding",
|
886 |
+
"mistral-embed",
|
887 |
+
"OpenSearch-text-hybrid",
|
888 |
+
"text-embedding-3-small",
|
889 |
+
"text-embedding-3-large",
|
890 |
+
"text-embedding-3-large-256",
|
891 |
+
"text-embedding-ada-002",
|
892 |
+
"text-similarity-ada-001",
|
893 |
+
"text-similarity-babbage-001",
|
894 |
+
"text-similarity-curie-001",
|
895 |
+
"text-similarity-davinci-001",
|
896 |
+
"text-search-ada-doc-001",
|
897 |
+
"text-search-ada-query-001",
|
898 |
+
"text-search-ada-001",
|
899 |
+
"text-search-curie-001",
|
900 |
+
"text-search-babbage-001",
|
901 |
+
"text-search-davinci-001",
|
902 |
+
"titan-embed-text-v1",
|
903 |
+
"voyage-2",
|
904 |
+
"voyage-code-2",
|
905 |
+
"voyage-lite-01-instruct",
|
906 |
+
"voyage-lite-02-instruct",
|
907 |
+
}
|
908 |
+
PROPRIETARY_MODELS = {
|
909 |
+
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
|
910 |
+
for model in PROPRIETARY_MODELS
|
911 |
+
}
|
912 |
+
|
913 |
MODELS_TO_SKIP = {
|
914 |
"baseplate/instructor-large-1", # Duplicate
|
915 |
"radames/e5-large", # Duplicate
|
|
|
2027 |
if model_type == "Open":
|
2028 |
masks.append(df["Model Size (Million Parameters)"] != "")
|
2029 |
elif model_type == "Proprietary":
|
2030 |
+
masks.append(df["Model"].isin(PROPRIETARY_MODELS))
|
2031 |
elif model_type == "Sentence Transformers":
|
2032 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
2033 |
if masks:
|