orionweller commited on
Commit
5012f81
·
1 Parent(s): 553c548

Automated Leaderboard Update

Browse files
Files changed (33) hide show
  1. EXTERNAL_MODEL_RESULTS.json +194 -0
  2. all_data_tasks/0/default.jsonl +0 -0
  3. all_data_tasks/1/default.jsonl +0 -0
  4. all_data_tasks/10/default.jsonl +0 -0
  5. all_data_tasks/11/default.jsonl +0 -0
  6. all_data_tasks/12/default.jsonl +0 -0
  7. all_data_tasks/13/default.jsonl +0 -0
  8. all_data_tasks/2/default.jsonl +0 -0
  9. all_data_tasks/3/default.jsonl +0 -0
  10. all_data_tasks/4/default.jsonl +0 -0
  11. all_data_tasks/45/default.jsonl +0 -0
  12. all_data_tasks/5/default.jsonl +0 -0
  13. all_data_tasks/51/default.jsonl +0 -0
  14. all_data_tasks/6/default.jsonl +0 -0
  15. all_data_tasks/8/default.jsonl +0 -0
  16. all_data_tasks/9/default.jsonl +0 -0
  17. boards_data/coir/data_tasks/Retrieval/default.jsonl +0 -0
  18. boards_data/en/data_overall/default.jsonl +0 -0
  19. boards_data/en/data_tasks/Classification/default.jsonl +0 -0
  20. boards_data/en/data_tasks/Clustering/default.jsonl +0 -0
  21. boards_data/en/data_tasks/PairClassification/default.jsonl +0 -0
  22. boards_data/en/data_tasks/Reranking/default.jsonl +0 -0
  23. boards_data/en/data_tasks/Retrieval/default.jsonl +0 -0
  24. boards_data/en/data_tasks/STS/default.jsonl +0 -0
  25. boards_data/en/data_tasks/Summarization/default.jsonl +0 -0
  26. boards_data/other-sts/data_tasks/STS/default.jsonl +0 -0
  27. boards_data/zh/data_overall/default.jsonl +0 -0
  28. boards_data/zh/data_tasks/Classification/default.jsonl +0 -0
  29. boards_data/zh/data_tasks/Clustering/default.jsonl +0 -0
  30. boards_data/zh/data_tasks/PairClassification/default.jsonl +0 -0
  31. boards_data/zh/data_tasks/Reranking/default.jsonl +0 -0
  32. boards_data/zh/data_tasks/Retrieval/default.jsonl +0 -0
  33. boards_data/zh/data_tasks/STS/default.jsonl +0 -0
EXTERNAL_MODEL_RESULTS.json CHANGED
@@ -45518,6 +45518,144 @@
45518
  ]
45519
  }
45520
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45521
  "voyageai__voyage-code-2": {
45522
  "BitextMining": {
45523
  "f1": []
@@ -45609,6 +45747,62 @@
45609
  "p-MRR": []
45610
  }
45611
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45612
  "voyageai__voyage-large-2-instruct": {
45613
  "BitextMining": {
45614
  "f1": [
 
45518
  ]
45519
  }
45520
  },
45521
+ "voyageai__voyage-3-m-exp": {
45522
+ "BitextMining": {
45523
+ "f1": []
45524
+ },
45525
+ "Classification": {
45526
+ "accuracy": [
45527
+ {
45528
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45529
+ "AmazonCounterfactualClassification (en)": 95.78,
45530
+ "AmazonPolarityClassification": 97.71,
45531
+ "AmazonReviewsClassification (en)": 63.62,
45532
+ "Banking77Classification": 93.8,
45533
+ "EmotionClassification": 64.86,
45534
+ "ImdbClassification": 97.29,
45535
+ "MTOPDomainClassification (en)": 99.95,
45536
+ "MTOPIntentClassification (en)": 91.85,
45537
+ "MassiveIntentClassification (en)": 91.94,
45538
+ "MassiveScenarioClassification (en)": 99.3,
45539
+ "ToxicConversationsClassification": 97.59,
45540
+ "TweetSentimentExtractionClassification": 88.23
45541
+ }
45542
+ ]
45543
+ },
45544
+ "Clustering": {
45545
+ "v_measure": [
45546
+ {
45547
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45548
+ "ArxivClusteringP2P": 57.14,
45549
+ "ArxivClusteringS2S": 52.64,
45550
+ "BiorxivClusteringP2P": 54.65,
45551
+ "BiorxivClusteringS2S": 49.95,
45552
+ "MedrxivClusteringP2P": 51.53,
45553
+ "MedrxivClusteringS2S": 48.99,
45554
+ "RedditClustering": 77.16,
45555
+ "RedditClusteringP2P": 70.23,
45556
+ "StackExchangeClustering": 82.03,
45557
+ "StackExchangeClusteringP2P": 48.17,
45558
+ "TwentyNewsgroupsClustering": 83.49
45559
+ }
45560
+ ]
45561
+ },
45562
+ "PairClassification": {
45563
+ "max_ap": [
45564
+ {
45565
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45566
+ "SprintDuplicateQuestions": 92.09,
45567
+ "TwitterSemEval2015": 79.18,
45568
+ "TwitterURLCorpus": 87.12
45569
+ },
45570
+ {
45571
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45572
+ "SprintDuplicateQuestions": 92.09,
45573
+ "TwitterSemEval2015": 79.18,
45574
+ "TwitterURLCorpus": 87.12
45575
+ }
45576
+ ]
45577
+ },
45578
+ "Reranking": {
45579
+ "map": [
45580
+ {
45581
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45582
+ "AskUbuntuDupQuestions": 66.93,
45583
+ "MindSmallReranking": 29.68,
45584
+ "SciDocsRR": 86.72,
45585
+ "StackOverflowDupQuestions": 54.69
45586
+ }
45587
+ ]
45588
+ },
45589
+ "Retrieval": {
45590
+ "ndcg_at_10": [
45591
+ {
45592
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45593
+ "ArguAna": 89.79,
45594
+ "ClimateFEVER": 56.93,
45595
+ "DBPedia": 48.25,
45596
+ "FEVER": 96.28,
45597
+ "FiQA2018": 78.01,
45598
+ "HotpotQA": 86.96,
45599
+ "MSMARCO": 37.28,
45600
+ "NFCorpus": 46.99,
45601
+ "NQ": 82.48,
45602
+ "QuoraRetrieval": 88.86,
45603
+ "SCIDOCS": 34.53,
45604
+ "SciFact": 85.09,
45605
+ "TRECCOVID": 82.75,
45606
+ "Touche2020": 39.39
45607
+ }
45608
+ ]
45609
+ },
45610
+ "STS": {
45611
+ "cosine_spearman": [
45612
+ {
45613
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45614
+ "BIOSSES": 87.7,
45615
+ "SICK-R": 81.62,
45616
+ "STS12": 78.57,
45617
+ "STS13": 88.19,
45618
+ "STS14": 84.07,
45619
+ "STS15": 89.45,
45620
+ "STS16": 86.36,
45621
+ "STS17 (en-en)": 89.36,
45622
+ "STS22 (en)": 65.48,
45623
+ "STSBenchmark": 88.32
45624
+ },
45625
+ {
45626
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45627
+ "BIOSSES": 87.7,
45628
+ "SICK-R": 81.62,
45629
+ "STS12": 78.57,
45630
+ "STS13": 88.19,
45631
+ "STS14": 84.07,
45632
+ "STS15": 89.45,
45633
+ "STS16": 86.36,
45634
+ "STS17 (en-en)": 89.36,
45635
+ "STS22 (en)": 65.48,
45636
+ "STSBenchmark": 88.32
45637
+ }
45638
+ ]
45639
+ },
45640
+ "Summarization": {
45641
+ "cosine_spearman": [
45642
+ {
45643
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45644
+ "SummEval": 30.45
45645
+ },
45646
+ {
45647
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-3-m-exp</a>",
45648
+ "SummEval": 30.45
45649
+ }
45650
+ ]
45651
+ },
45652
+ "MultilabelClassification": {
45653
+ "accuracy": []
45654
+ },
45655
+ "InstructionRetrieval": {
45656
+ "p-MRR": []
45657
+ }
45658
+ },
45659
  "voyageai__voyage-code-2": {
45660
  "BitextMining": {
45661
  "f1": []
 
45747
  "p-MRR": []
45748
  }
45749
  },
45750
+ "voyageai__voyage-code-3": {
45751
+ "BitextMining": {
45752
+ "f1": []
45753
+ },
45754
+ "Classification": {
45755
+ "accuracy": []
45756
+ },
45757
+ "Clustering": {
45758
+ "v_measure": []
45759
+ },
45760
+ "PairClassification": {
45761
+ "max_ap": []
45762
+ },
45763
+ "Reranking": {
45764
+ "map": []
45765
+ },
45766
+ "Retrieval": {
45767
+ "ndcg_at_10": [
45768
+ {
45769
+ "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://docs.voyageai.com/embeddings/\">voyage-code-3</a>",
45770
+ "AppsRetrieval": 93.62,
45771
+ "CodeFeedbackMT": 93.58,
45772
+ "CodeFeedbackST": 90.67,
45773
+ "CodeSearchNetCCRetrieval (python)": 94.43,
45774
+ "CodeSearchNetCCRetrieval (javascript)": 91.53,
45775
+ "CodeSearchNetCCRetrieval (go)": 91.76,
45776
+ "CodeSearchNetCCRetrieval (ruby)": 89.26,
45777
+ "CodeSearchNetCCRetrieval (java)": 90.19,
45778
+ "CodeSearchNetCCRetrieval (php)": 83.39,
45779
+ "CodeSearchNetRetrieval (python)": 96.69,
45780
+ "CodeSearchNetRetrieval (javascript)": 89.98,
45781
+ "CodeSearchNetRetrieval (go)": 97.5,
45782
+ "CodeSearchNetRetrieval (ruby)": 92.83,
45783
+ "CodeSearchNetRetrieval (java)": 94.51,
45784
+ "CodeSearchNetRetrieval (php)": 92.29,
45785
+ "CodeTransOceanContest": 94.96,
45786
+ "CodeTransOceanDL": 38.57,
45787
+ "CosQA": 34.45,
45788
+ "StackOverflowQA": 97.17,
45789
+ "SyntheticText2SQL": 62.87
45790
+ }
45791
+ ]
45792
+ },
45793
+ "STS": {
45794
+ "cosine_spearman": []
45795
+ },
45796
+ "Summarization": {
45797
+ "cosine_spearman": []
45798
+ },
45799
+ "MultilabelClassification": {
45800
+ "accuracy": []
45801
+ },
45802
+ "InstructionRetrieval": {
45803
+ "p-MRR": []
45804
+ }
45805
+ },
45806
  "voyageai__voyage-large-2-instruct": {
45807
  "BitextMining": {
45808
  "f1": [
all_data_tasks/0/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/1/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/10/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/11/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/12/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/13/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/2/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/3/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/4/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/45/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/5/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/51/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/6/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/8/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
all_data_tasks/9/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/coir/data_tasks/Retrieval/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_overall/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_tasks/Classification/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_tasks/Clustering/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_tasks/PairClassification/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_tasks/Reranking/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_tasks/Retrieval/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_tasks/STS/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/en/data_tasks/Summarization/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/other-sts/data_tasks/STS/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/zh/data_overall/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/zh/data_tasks/Classification/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/zh/data_tasks/Clustering/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/zh/data_tasks/PairClassification/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/zh/data_tasks/Reranking/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/zh/data_tasks/Retrieval/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
boards_data/zh/data_tasks/STS/default.jsonl CHANGED
The diff for this file is too large to render. See raw diff