Tom Aarsen commited on
Commit
5613607
1 Parent(s): e82960d

Use separate proprietary models list

Browse files
Files changed (1) hide show
  1. app.py +33 -2
app.py CHANGED
@@ -874,11 +874,42 @@ EXTERNAL_MODEL_TO_SIZE = {
874
  "text2vec-large-chinese": 326,
875
  "unsup-simcse-bert-base-uncased": 110,
876
  "use-cmlm-multilingual": 472,
877
- # "voyage-lite-02-instruct": 613, # <- Removed as we use unknown sizes to mark API models
878
  "xlm-roberta-base": 279,
879
  "xlm-roberta-large": 560,
880
  }
881
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
882
  MODELS_TO_SKIP = {
883
  "baseplate/instructor-large-1", # Duplicate
884
  "radames/e5-large", # Duplicate
@@ -1996,7 +2027,7 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
1996
  if model_type == "Open":
1997
  masks.append(df["Model Size (Million Parameters)"] != "")
1998
  elif model_type == "Proprietary":
1999
- masks.append(df["Model Size (Million Parameters)"] == "")
2000
  elif model_type == "Sentence Transformers":
2001
  masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
2002
  if masks:
 
874
  "text2vec-large-chinese": 326,
875
  "unsup-simcse-bert-base-uncased": 110,
876
  "use-cmlm-multilingual": 472,
877
+ "voyage-lite-02-instruct": 1220,
878
  "xlm-roberta-base": 279,
879
  "xlm-roberta-large": 560,
880
  }
881
 
882
+ PROPRIETARY_MODELS = {
883
+ "Cohere-embed-multilingual-v3.0",
884
+ "Cohere-embed-multilingual-light-v3.0",
885
+ "Baichuan-text-embedding",
886
+ "mistral-embed",
887
+ "OpenSearch-text-hybrid",
888
+ "text-embedding-3-small",
889
+ "text-embedding-3-large",
890
+ "text-embedding-3-large-256",
891
+ "text-embedding-ada-002",
892
+ "text-similarity-ada-001",
893
+ "text-similarity-babbage-001",
894
+ "text-similarity-curie-001",
895
+ "text-similarity-davinci-001",
896
+ "text-search-ada-doc-001",
897
+ "text-search-ada-query-001",
898
+ "text-search-ada-001",
899
+ "text-search-curie-001",
900
+ "text-search-babbage-001",
901
+ "text-search-davinci-001",
902
+ "titan-embed-text-v1",
903
+ "voyage-2",
904
+ "voyage-code-2",
905
+ "voyage-lite-01-instruct",
906
+ "voyage-lite-02-instruct",
907
+ }
908
+ PROPRIETARY_MODELS = {
909
+ make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
910
+ for model in PROPRIETARY_MODELS
911
+ }
912
+
913
  MODELS_TO_SKIP = {
914
  "baseplate/instructor-large-1", # Duplicate
915
  "radames/e5-large", # Duplicate
 
2027
  if model_type == "Open":
2028
  masks.append(df["Model Size (Million Parameters)"] != "")
2029
  elif model_type == "Proprietary":
2030
+ masks.append(df["Model"].isin(PROPRIETARY_MODELS))
2031
  elif model_type == "Sentence Transformers":
2032
  masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
2033
  if masks: