Spaces:
Running
Running
Muennighoff
commited on
Commit
•
e556bec
1
Parent(s):
4e8ec86
BGE v1.5
Browse files
app.py
CHANGED
@@ -279,10 +279,10 @@ EXTERNAL_MODELS = [
|
|
279 |
"allenai-specter",
|
280 |
"bert-base-swedish-cased",
|
281 |
"bert-base-uncased",
|
282 |
-
"bge-base-zh",
|
283 |
-
"bge-large-zh",
|
284 |
"bge-large-zh-noinstruct",
|
285 |
-
"bge-small-zh",
|
286 |
"contriever-base-msmarco",
|
287 |
"cross-en-de-roberta-sentence-transformer",
|
288 |
"dfm-encoder-large-v1",
|
@@ -355,10 +355,10 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
355 |
"all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
|
356 |
"bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
|
357 |
"bert-base-uncased": "https://huggingface.co/bert-base-uncased",
|
358 |
-
"bge-base-zh": "https://huggingface.co/BAAI/bge-base-zh",
|
359 |
-
"bge-large-zh": "https://huggingface.co/BAAI/bge-large-zh",
|
360 |
"bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
|
361 |
-
"bge-small-zh": "https://huggingface.co/BAAI/bge-small-zh",
|
362 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
363 |
"cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
|
364 |
"DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
|
@@ -431,10 +431,10 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
431 |
"allenai-specter": 768,
|
432 |
"bert-base-swedish-cased": 768,
|
433 |
"bert-base-uncased": 768,
|
434 |
-
"bge-base-zh": 768,
|
435 |
-
"bge-large-zh": 1024,
|
436 |
"bge-large-zh-noinstruct": 1024,
|
437 |
-
"bge-small-zh": 512,
|
438 |
"contriever-base-msmarco": 768,
|
439 |
"cross-en-de-roberta-sentence-transformer": 768,
|
440 |
"DanskBERT": 768,
|
@@ -507,10 +507,10 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
507 |
"allenai-specter": 512,
|
508 |
"bert-base-swedish-cased": 512,
|
509 |
"bert-base-uncased": 512,
|
510 |
-
"bge-base-zh": 512,
|
511 |
-
"bge-large-zh": 512,
|
512 |
"bge-large-zh-noinstruct": 512,
|
513 |
-
"bge-small-zh": 512,
|
514 |
"contriever-base-msmarco": 512,
|
515 |
"cross-en-de-roberta-sentence-transformer": 514,
|
516 |
"DanskBERT": 514,
|
@@ -583,10 +583,10 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
583 |
"all-mpnet-base-v2": 0.44,
|
584 |
"bert-base-uncased": 0.44,
|
585 |
"bert-base-swedish-cased": 0.50,
|
586 |
-
"bge-base-zh": 0.41,
|
587 |
-
"bge-large-zh": 1.30,
|
588 |
"bge-large-zh-noinstruct": 1.30,
|
589 |
-
"bge-small-zh": 0.10,
|
590 |
"cross-en-de-roberta-sentence-transformer": 1.11,
|
591 |
"contriever-base-msmarco": 0.44,
|
592 |
"DanskBERT": 0.50,
|
@@ -675,6 +675,9 @@ MODELS_TO_SKIP = {
|
|
675 |
"kozistr/fused-large-en",
|
676 |
"sionic-ai/sionic-ai-v2", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
|
677 |
"sionic-ai/sionic-ai-v1", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
|
|
|
|
|
|
|
678 |
}
|
679 |
|
680 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|
|
|
279 |
"allenai-specter",
|
280 |
"bert-base-swedish-cased",
|
281 |
"bert-base-uncased",
|
282 |
+
"bge-base-zh-v1.5",
|
283 |
+
"bge-large-zh-v1.5",
|
284 |
"bge-large-zh-noinstruct",
|
285 |
+
"bge-small-zh-v1.5",
|
286 |
"contriever-base-msmarco",
|
287 |
"cross-en-de-roberta-sentence-transformer",
|
288 |
"dfm-encoder-large-v1",
|
|
|
355 |
"all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
|
356 |
"bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
|
357 |
"bert-base-uncased": "https://huggingface.co/bert-base-uncased",
|
358 |
+
"bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
|
359 |
+
"bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
|
360 |
"bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
|
361 |
+
"bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
|
362 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
363 |
"cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
|
364 |
"DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
|
|
|
431 |
"allenai-specter": 768,
|
432 |
"bert-base-swedish-cased": 768,
|
433 |
"bert-base-uncased": 768,
|
434 |
+
"bge-base-zh-v1.5": 768,
|
435 |
+
"bge-large-zh-v1.5": 1024,
|
436 |
"bge-large-zh-noinstruct": 1024,
|
437 |
+
"bge-small-zh-v1.5": 512,
|
438 |
"contriever-base-msmarco": 768,
|
439 |
"cross-en-de-roberta-sentence-transformer": 768,
|
440 |
"DanskBERT": 768,
|
|
|
507 |
"allenai-specter": 512,
|
508 |
"bert-base-swedish-cased": 512,
|
509 |
"bert-base-uncased": 512,
|
510 |
+
"bge-base-zh-v1.5": 512,
|
511 |
+
"bge-large-zh-v1.5": 512,
|
512 |
"bge-large-zh-noinstruct": 512,
|
513 |
+
"bge-small-zh-v1.5": 512,
|
514 |
"contriever-base-msmarco": 512,
|
515 |
"cross-en-de-roberta-sentence-transformer": 514,
|
516 |
"DanskBERT": 514,
|
|
|
583 |
"all-mpnet-base-v2": 0.44,
|
584 |
"bert-base-uncased": 0.44,
|
585 |
"bert-base-swedish-cased": 0.50,
|
586 |
+
"bge-base-zh-v1.5": 0.41,
|
587 |
+
"bge-large-zh-v1.5": 1.30,
|
588 |
"bge-large-zh-noinstruct": 1.30,
|
589 |
+
"bge-small-zh-v1.5": 0.10,
|
590 |
"cross-en-de-roberta-sentence-transformer": 1.11,
|
591 |
"contriever-base-msmarco": 0.44,
|
592 |
"DanskBERT": 0.50,
|
|
|
675 |
"kozistr/fused-large-en",
|
676 |
"sionic-ai/sionic-ai-v2", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
|
677 |
"sionic-ai/sionic-ai-v1", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1
|
678 |
+
"BAAI/bge-large-en", # Deprecated in favor of v1.5
|
679 |
+
"BAAI/bge-base-en", # Deprecated in favor of v1.5
|
680 |
+
"BAAI/bge-small-en", # Deprecated in favor of v1.5
|
681 |
}
|
682 |
|
683 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|