Spaces:
Running
Running
orionweller
commited on
Commit
·
5012f81
1
Parent(s):
553c548
Automated Leaderboard Update
Browse files- EXTERNAL_MODEL_RESULTS.json +194 -0
- all_data_tasks/0/default.jsonl +0 -0
- all_data_tasks/1/default.jsonl +0 -0
- all_data_tasks/10/default.jsonl +0 -0
- all_data_tasks/11/default.jsonl +0 -0
- all_data_tasks/12/default.jsonl +0 -0
- all_data_tasks/13/default.jsonl +0 -0
- all_data_tasks/2/default.jsonl +0 -0
- all_data_tasks/3/default.jsonl +0 -0
- all_data_tasks/4/default.jsonl +0 -0
- all_data_tasks/45/default.jsonl +0 -0
- all_data_tasks/5/default.jsonl +0 -0
- all_data_tasks/51/default.jsonl +0 -0
- all_data_tasks/6/default.jsonl +0 -0
- all_data_tasks/8/default.jsonl +0 -0
- all_data_tasks/9/default.jsonl +0 -0
- boards_data/coir/data_tasks/Retrieval/default.jsonl +0 -0
- boards_data/en/data_overall/default.jsonl +0 -0
- boards_data/en/data_tasks/Classification/default.jsonl +0 -0
- boards_data/en/data_tasks/Clustering/default.jsonl +0 -0
- boards_data/en/data_tasks/PairClassification/default.jsonl +0 -0
- boards_data/en/data_tasks/Reranking/default.jsonl +0 -0
- boards_data/en/data_tasks/Retrieval/default.jsonl +0 -0
- boards_data/en/data_tasks/STS/default.jsonl +0 -0
- boards_data/en/data_tasks/Summarization/default.jsonl +0 -0
- boards_data/other-sts/data_tasks/STS/default.jsonl +0 -0
- boards_data/zh/data_overall/default.jsonl +0 -0
- boards_data/zh/data_tasks/Classification/default.jsonl +0 -0
- boards_data/zh/data_tasks/Clustering/default.jsonl +0 -0
- boards_data/zh/data_tasks/PairClassification/default.jsonl +0 -0
- boards_data/zh/data_tasks/Reranking/default.jsonl +0 -0
- boards_data/zh/data_tasks/Retrieval/default.jsonl +0 -0
- boards_data/zh/data_tasks/STS/default.jsonl +0 -0
EXTERNAL_MODEL_RESULTS.json
CHANGED
@@ -45518,6 +45518,144 @@
|
|
45518 |
]
|
45519 |
}
|
45520 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45521 |
"voyageai__voyage-code-2": {
|
45522 |
"BitextMining": {
|
45523 |
"f1": []
|
@@ -45609,6 +45747,62 @@
|
|
45609 |
"p-MRR": []
|
45610 |
}
|
45611 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45612 |
"voyageai__voyage-large-2-instruct": {
|
45613 |
"BitextMining": {
|
45614 |
"f1": [
|
|
|
45518 |
]
|
45519 |
}
|
45520 |
},
|
45521 |
+
"voyageai__voyage-3-m-exp": {
|
45522 |
+
"BitextMining": {
|
45523 |
+
"f1": []
|
45524 |
+
},
|
45525 |
+
"Classification": {
|
45526 |
+
"accuracy": [
|
45527 |
+
{
|
45528 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45529 |
+
"AmazonCounterfactualClassification (en)": 95.78,
|
45530 |
+
"AmazonPolarityClassification": 97.71,
|
45531 |
+
"AmazonReviewsClassification (en)": 63.62,
|
45532 |
+
"Banking77Classification": 93.8,
|
45533 |
+
"EmotionClassification": 64.86,
|
45534 |
+
"ImdbClassification": 97.29,
|
45535 |
+
"MTOPDomainClassification (en)": 99.95,
|
45536 |
+
"MTOPIntentClassification (en)": 91.85,
|
45537 |
+
"MassiveIntentClassification (en)": 91.94,
|
45538 |
+
"MassiveScenarioClassification (en)": 99.3,
|
45539 |
+
"ToxicConversationsClassification": 97.59,
|
45540 |
+
"TweetSentimentExtractionClassification": 88.23
|
45541 |
+
}
|
45542 |
+
]
|
45543 |
+
},
|
45544 |
+
"Clustering": {
|
45545 |
+
"v_measure": [
|
45546 |
+
{
|
45547 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45548 |
+
"ArxivClusteringP2P": 57.14,
|
45549 |
+
"ArxivClusteringS2S": 52.64,
|
45550 |
+
"BiorxivClusteringP2P": 54.65,
|
45551 |
+
"BiorxivClusteringS2S": 49.95,
|
45552 |
+
"MedrxivClusteringP2P": 51.53,
|
45553 |
+
"MedrxivClusteringS2S": 48.99,
|
45554 |
+
"RedditClustering": 77.16,
|
45555 |
+
"RedditClusteringP2P": 70.23,
|
45556 |
+
"StackExchangeClustering": 82.03,
|
45557 |
+
"StackExchangeClusteringP2P": 48.17,
|
45558 |
+
"TwentyNewsgroupsClustering": 83.49
|
45559 |
+
}
|
45560 |
+
]
|
45561 |
+
},
|
45562 |
+
"PairClassification": {
|
45563 |
+
"max_ap": [
|
45564 |
+
{
|
45565 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45566 |
+
"SprintDuplicateQuestions": 92.09,
|
45567 |
+
"TwitterSemEval2015": 79.18,
|
45568 |
+
"TwitterURLCorpus": 87.12
|
45569 |
+
},
|
45570 |
+
{
|
45571 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45572 |
+
"SprintDuplicateQuestions": 92.09,
|
45573 |
+
"TwitterSemEval2015": 79.18,
|
45574 |
+
"TwitterURLCorpus": 87.12
|
45575 |
+
}
|
45576 |
+
]
|
45577 |
+
},
|
45578 |
+
"Reranking": {
|
45579 |
+
"map": [
|
45580 |
+
{
|
45581 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45582 |
+
"AskUbuntuDupQuestions": 66.93,
|
45583 |
+
"MindSmallReranking": 29.68,
|
45584 |
+
"SciDocsRR": 86.72,
|
45585 |
+
"StackOverflowDupQuestions": 54.69
|
45586 |
+
}
|
45587 |
+
]
|
45588 |
+
},
|
45589 |
+
"Retrieval": {
|
45590 |
+
"ndcg_at_10": [
|
45591 |
+
{
|
45592 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45593 |
+
"ArguAna": 89.79,
|
45594 |
+
"ClimateFEVER": 56.93,
|
45595 |
+
"DBPedia": 48.25,
|
45596 |
+
"FEVER": 96.28,
|
45597 |
+
"FiQA2018": 78.01,
|
45598 |
+
"HotpotQA": 86.96,
|
45599 |
+
"MSMARCO": 37.28,
|
45600 |
+
"NFCorpus": 46.99,
|
45601 |
+
"NQ": 82.48,
|
45602 |
+
"QuoraRetrieval": 88.86,
|
45603 |
+
"SCIDOCS": 34.53,
|
45604 |
+
"SciFact": 85.09,
|
45605 |
+
"TRECCOVID": 82.75,
|
45606 |
+
"Touche2020": 39.39
|
45607 |
+
}
|
45608 |
+
]
|
45609 |
+
},
|
45610 |
+
"STS": {
|
45611 |
+
"cosine_spearman": [
|
45612 |
+
{
|
45613 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45614 |
+
"BIOSSES": 87.7,
|
45615 |
+
"SICK-R": 81.62,
|
45616 |
+
"STS12": 78.57,
|
45617 |
+
"STS13": 88.19,
|
45618 |
+
"STS14": 84.07,
|
45619 |
+
"STS15": 89.45,
|
45620 |
+
"STS16": 86.36,
|
45621 |
+
"STS17 (en-en)": 89.36,
|
45622 |
+
"STS22 (en)": 65.48,
|
45623 |
+
"STSBenchmark": 88.32
|
45624 |
+
},
|
45625 |
+
{
|
45626 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45627 |
+
"BIOSSES": 87.7,
|
45628 |
+
"SICK-R": 81.62,
|
45629 |
+
"STS12": 78.57,
|
45630 |
+
"STS13": 88.19,
|
45631 |
+
"STS14": 84.07,
|
45632 |
+
"STS15": 89.45,
|
45633 |
+
"STS16": 86.36,
|
45634 |
+
"STS17 (en-en)": 89.36,
|
45635 |
+
"STS22 (en)": 65.48,
|
45636 |
+
"STSBenchmark": 88.32
|
45637 |
+
}
|
45638 |
+
]
|
45639 |
+
},
|
45640 |
+
"Summarization": {
|
45641 |
+
"cosine_spearman": [
|
45642 |
+
{
|
45643 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45644 |
+
"SummEval": 30.45
|
45645 |
+
},
|
45646 |
+
{
|
45647 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
|
45648 |
+
"SummEval": 30.45
|
45649 |
+
}
|
45650 |
+
]
|
45651 |
+
},
|
45652 |
+
"MultilabelClassification": {
|
45653 |
+
"accuracy": []
|
45654 |
+
},
|
45655 |
+
"InstructionRetrieval": {
|
45656 |
+
"p-MRR": []
|
45657 |
+
}
|
45658 |
+
},
|
45659 |
"voyageai__voyage-code-2": {
|
45660 |
"BitextMining": {
|
45661 |
"f1": []
|
|
|
45747 |
"p-MRR": []
|
45748 |
}
|
45749 |
},
|
45750 |
+
"voyageai__voyage-code-3": {
|
45751 |
+
"BitextMining": {
|
45752 |
+
"f1": []
|
45753 |
+
},
|
45754 |
+
"Classification": {
|
45755 |
+
"accuracy": []
|
45756 |
+
},
|
45757 |
+
"Clustering": {
|
45758 |
+
"v_measure": []
|
45759 |
+
},
|
45760 |
+
"PairClassification": {
|
45761 |
+
"max_ap": []
|
45762 |
+
},
|
45763 |
+
"Reranking": {
|
45764 |
+
"map": []
|
45765 |
+
},
|
45766 |
+
"Retrieval": {
|
45767 |
+
"ndcg_at_10": [
|
45768 |
+
{
|
45769 |
+
"Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-code-3</a>",
|
45770 |
+
"AppsRetrieval": 93.62,
|
45771 |
+
"CodeFeedbackMT": 93.58,
|
45772 |
+
"CodeFeedbackST": 90.67,
|
45773 |
+
"CodeSearchNetCCRetrieval (python)": 94.43,
|
45774 |
+
"CodeSearchNetCCRetrieval (javascript)": 91.53,
|
45775 |
+
"CodeSearchNetCCRetrieval (go)": 91.76,
|
45776 |
+
"CodeSearchNetCCRetrieval (ruby)": 89.26,
|
45777 |
+
"CodeSearchNetCCRetrieval (java)": 90.19,
|
45778 |
+
"CodeSearchNetCCRetrieval (php)": 83.39,
|
45779 |
+
"CodeSearchNetRetrieval (python)": 96.69,
|
45780 |
+
"CodeSearchNetRetrieval (javascript)": 89.98,
|
45781 |
+
"CodeSearchNetRetrieval (go)": 97.5,
|
45782 |
+
"CodeSearchNetRetrieval (ruby)": 92.83,
|
45783 |
+
"CodeSearchNetRetrieval (java)": 94.51,
|
45784 |
+
"CodeSearchNetRetrieval (php)": 92.29,
|
45785 |
+
"CodeTransOceanContest": 94.96,
|
45786 |
+
"CodeTransOceanDL": 38.57,
|
45787 |
+
"CosQA": 34.45,
|
45788 |
+
"StackOverflowQA": 97.17,
|
45789 |
+
"SyntheticText2SQL": 62.87
|
45790 |
+
}
|
45791 |
+
]
|
45792 |
+
},
|
45793 |
+
"STS": {
|
45794 |
+
"cosine_spearman": []
|
45795 |
+
},
|
45796 |
+
"Summarization": {
|
45797 |
+
"cosine_spearman": []
|
45798 |
+
},
|
45799 |
+
"MultilabelClassification": {
|
45800 |
+
"accuracy": []
|
45801 |
+
},
|
45802 |
+
"InstructionRetrieval": {
|
45803 |
+
"p-MRR": []
|
45804 |
+
}
|
45805 |
+
},
|
45806 |
"voyageai__voyage-large-2-instruct": {
|
45807 |
"BitextMining": {
|
45808 |
"f1": [
|
all_data_tasks/0/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/1/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/10/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/11/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/12/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/13/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/2/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/3/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/4/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/45/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/5/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/51/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/6/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/8/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
all_data_tasks/9/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/coir/data_tasks/Retrieval/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_overall/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_tasks/Classification/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_tasks/Clustering/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_tasks/PairClassification/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_tasks/Reranking/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_tasks/Retrieval/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_tasks/STS/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/en/data_tasks/Summarization/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/other-sts/data_tasks/STS/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/zh/data_overall/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/zh/data_tasks/Classification/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/zh/data_tasks/Clustering/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/zh/data_tasks/PairClassification/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/zh/data_tasks/Reranking/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/zh/data_tasks/Retrieval/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
boards_data/zh/data_tasks/STS/default.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|