[init] model

Browse files

Files changed (8) hide show

README.md +1056 -1
added_tokens.json +7 -0
config.json +35 -0
pytorch_model.bin +3 -0
special_tokens_map.json +14 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,1058 @@
 ---
-license: apache-2.0
 ---

 ---
+tags:
+- mteb
+model-index:
+- name: tao-8k
+  results:
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/AFQMC
+      name: MTEB AFQMC
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 46.6327281304144
+    - type: cos_sim_spearman
+      value: 48.842454434123376
+    - type: euclidean_pearson
+      value: 46.94481399008005
+    - type: euclidean_spearman
+      value: 48.842454434123376
+    - type: manhattan_pearson
+      value: 46.89375935801324
+    - type: manhattan_spearman
+      value: 48.78990181105918
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/ATEC
+      name: MTEB ATEC
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 51.29442837260785
+    - type: cos_sim_spearman
+      value: 52.652094634834
+    - type: euclidean_pearson
+      value: 54.86278112546793
+    - type: euclidean_spearman
+      value: 52.65209238258423
+    - type: manhattan_pearson
+      value: 54.8164800665497
+    - type: manhattan_spearman
+      value: 52.626711935726014
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_reviews_multi
+      name: MTEB AmazonReviewsClassification (zh)
+      config: zh
+      split: test
+      revision: 1399c76144fd37290681b995c656ef9b2e06e26d
+    metrics:
+    - type: accuracy
+      value: 41.51200000000001
+    - type: f1
+      value: 39.47955832883091
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/BQ
+      name: MTEB BQ
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 63.27653562193512
+    - type: cos_sim_spearman
+      value: 65.37293598647585
+    - type: euclidean_pearson
+      value: 63.91367659963474
+    - type: euclidean_spearman
+      value: 65.37294637878077
+    - type: manhattan_pearson
+      value: 63.89671277983551
+    - type: manhattan_spearman
+      value: 65.35510625635355
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/CLSClusteringP2P
+      name: MTEB CLSClusteringP2P
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 39.92148459596857
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/CLSClusteringS2S
+      name: MTEB CLSClusteringS2S
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 36.7800929733979
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/CMedQAv1-reranking
+      name: MTEB CMedQAv1
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map
+      value: 84.56370955233704
+    - type: mrr
+      value: 87.14396825396825
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/CMedQAv2-reranking
+      name: MTEB CMedQAv2
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map
+      value: 85.4719112626303
+    - type: mrr
+      value: 88.25107142857142
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/CmedqaRetrieval
+      name: MTEB CmedqaRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 24.314
+    - type: map_at_10
+      value: 36.157000000000004
+    - type: map_at_100
+      value: 38.004
+    - type: map_at_1000
+      value: 38.129999999999995
+    - type: map_at_3
+      value: 32.141999999999996
+    - type: map_at_5
+      value: 34.414
+    - type: mrr_at_1
+      value: 37.384
+    - type: mrr_at_10
+      value: 45.261
+    - type: mrr_at_100
+      value: 46.271
+    - type: mrr_at_1000
+      value: 46.32
+    - type: mrr_at_3
+      value: 42.760999999999996
+    - type: mrr_at_5
+      value: 44.219
+    - type: ndcg_at_1
+      value: 37.384
+    - type: ndcg_at_10
+      value: 42.599
+    - type: ndcg_at_100
+      value: 50.068999999999996
+    - type: ndcg_at_1000
+      value: 52.221
+    - type: ndcg_at_3
+      value: 37.551
+    - type: ndcg_at_5
+      value: 39.711
+    - type: precision_at_1
+      value: 37.384
+    - type: precision_at_10
+      value: 9.532
+    - type: precision_at_100
+      value: 1.554
+    - type: precision_at_1000
+      value: 0.183
+    - type: precision_at_3
+      value: 21.205
+    - type: precision_at_5
+      value: 15.539
+    - type: recall_at_1
+      value: 24.314
+    - type: recall_at_10
+      value: 52.463
+    - type: recall_at_100
+      value: 83.86099999999999
+    - type: recall_at_1000
+      value: 98.17399999999999
+    - type: recall_at_3
+      value: 37.341
+    - type: recall_at_5
+      value: 43.952999999999996
+  - task:
+      type: PairClassification
+    dataset:
+      type: C-MTEB/CMNLI
+      name: MTEB Cmnli
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: cos_sim_accuracy
+      value: 78.80938063740228
+    - type: cos_sim_ap
+      value: 87.42519095434638
+    - type: cos_sim_f1
+      value: 80.08597528210638
+    - type: cos_sim_precision
+      value: 74.10501193317423
+    - type: cos_sim_recall
+      value: 87.11713818096797
+    - type: dot_accuracy
+      value: 78.80938063740228
+    - type: dot_ap
+      value: 87.44023261310717
+    - type: dot_f1
+      value: 80.08597528210638
+    - type: dot_precision
+      value: 74.10501193317423
+    - type: dot_recall
+      value: 87.11713818096797
+    - type: euclidean_accuracy
+      value: 78.80938063740228
+    - type: euclidean_ap
+      value: 87.42517285949802
+    - type: euclidean_f1
+      value: 80.08597528210638
+    - type: euclidean_precision
+      value: 74.10501193317423
+    - type: euclidean_recall
+      value: 87.11713818096797
+    - type: manhattan_accuracy
+      value: 78.90559230306675
+    - type: manhattan_ap
+      value: 87.38730802838026
+    - type: manhattan_f1
+      value: 80.1043138107139
+    - type: manhattan_precision
+      value: 74.82744620381648
+    - type: manhattan_recall
+      value: 86.1819032031798
+    - type: max_accuracy
+      value: 78.90559230306675
+    - type: max_ap
+      value: 87.44023261310717
+    - type: max_f1
+      value: 80.1043138107139
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/CovidRetrieval
+      name: MTEB CovidRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 69.863
+    - type: map_at_10
+      value: 77.865
+    - type: map_at_100
+      value: 78.21900000000001
+    - type: map_at_1000
+      value: 78.22200000000001
+    - type: map_at_3
+      value: 76.335
+    - type: map_at_5
+      value: 77.179
+    - type: mrr_at_1
+      value: 70.074
+    - type: mrr_at_10
+      value: 77.89
+    - type: mrr_at_100
+      value: 78.235
+    - type: mrr_at_1000
+      value: 78.238
+    - type: mrr_at_3
+      value: 76.466
+    - type: mrr_at_5
+      value: 77.241
+    - type: ndcg_at_1
+      value: 70.074
+    - type: ndcg_at_10
+      value: 81.375
+    - type: ndcg_at_100
+      value: 82.918
+    - type: ndcg_at_1000
+      value: 83.019
+    - type: ndcg_at_3
+      value: 78.32000000000001
+    - type: ndcg_at_5
+      value: 79.824
+    - type: precision_at_1
+      value: 70.074
+    - type: precision_at_10
+      value: 9.325999999999999
+    - type: precision_at_100
+      value: 1.001
+    - type: precision_at_1000
+      value: 0.101
+    - type: precision_at_3
+      value: 28.17
+    - type: precision_at_5
+      value: 17.682000000000002
+    - type: recall_at_1
+      value: 69.863
+    - type: recall_at_10
+      value: 92.202
+    - type: recall_at_100
+      value: 99.05199999999999
+    - type: recall_at_1000
+      value: 99.895
+    - type: recall_at_3
+      value: 83.93
+    - type: recall_at_5
+      value: 87.566
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/DuRetrieval
+      name: MTEB DuRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 25.730999999999998
+    - type: map_at_10
+      value: 80.765
+    - type: map_at_100
+      value: 83.486
+    - type: map_at_1000
+      value: 83.521
+    - type: map_at_3
+      value: 55.745999999999995
+    - type: map_at_5
+      value: 70.473
+    - type: mrr_at_1
+      value: 89.55
+    - type: mrr_at_10
+      value: 93.028
+    - type: mrr_at_100
+      value: 93.093
+    - type: mrr_at_1000
+      value: 93.096
+    - type: mrr_at_3
+      value: 92.80000000000001
+    - type: mrr_at_5
+      value: 92.92200000000001
+    - type: ndcg_at_1
+      value: 89.55
+    - type: ndcg_at_10
+      value: 87.898
+    - type: ndcg_at_100
+      value: 90.366
+    - type: ndcg_at_1000
+      value: 90.715
+    - type: ndcg_at_3
+      value: 86.497
+    - type: ndcg_at_5
+      value: 85.533
+    - type: precision_at_1
+      value: 89.55
+    - type: precision_at_10
+      value: 42.305
+    - type: precision_at_100
+      value: 4.82
+    - type: precision_at_1000
+      value: 0.48900000000000005
+    - type: precision_at_3
+      value: 77.833
+    - type: precision_at_5
+      value: 65.81
+    - type: recall_at_1
+      value: 25.730999999999998
+    - type: recall_at_10
+      value: 89.409
+    - type: recall_at_100
+      value: 97.62100000000001
+    - type: recall_at_1000
+      value: 99.565
+    - type: recall_at_3
+      value: 58.298
+    - type: recall_at_5
+      value: 75.315
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/EcomRetrieval
+      name: MTEB EcomRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 49.6
+    - type: map_at_10
+      value: 59.34
+    - type: map_at_100
+      value: 59.894999999999996
+    - type: map_at_1000
+      value: 59.913000000000004
+    - type: map_at_3
+      value: 56.667
+    - type: map_at_5
+      value: 58.196999999999996
+    - type: mrr_at_1
+      value: 49.6
+    - type: mrr_at_10
+      value: 59.34
+    - type: mrr_at_100
+      value: 59.894999999999996
+    - type: mrr_at_1000
+      value: 59.913000000000004
+    - type: mrr_at_3
+      value: 56.667
+    - type: mrr_at_5
+      value: 58.196999999999996
+    - type: ndcg_at_1
+      value: 49.6
+    - type: ndcg_at_10
+      value: 64.461
+    - type: ndcg_at_100
+      value: 67.08800000000001
+    - type: ndcg_at_1000
+      value: 67.578
+    - type: ndcg_at_3
+      value: 58.962
+    - type: ndcg_at_5
+      value: 61.741
+    - type: precision_at_1
+      value: 49.6
+    - type: precision_at_10
+      value: 8.07
+    - type: precision_at_100
+      value: 0.928
+    - type: precision_at_1000
+      value: 0.097
+    - type: precision_at_3
+      value: 21.867
+    - type: precision_at_5
+      value: 14.48
+    - type: recall_at_1
+      value: 49.6
+    - type: recall_at_10
+      value: 80.7
+    - type: recall_at_100
+      value: 92.80000000000001
+    - type: recall_at_1000
+      value: 96.7
+    - type: recall_at_3
+      value: 65.60000000000001
+    - type: recall_at_5
+      value: 72.39999999999999
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/IFlyTek-classification
+      name: MTEB IFlyTek
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 47.44132358599462
+    - type: f1
+      value: 34.814352930577854
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/JDReview-classification
+      name: MTEB JDReview
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 86.43527204502813
+    - type: ap
+      value: 55.197728692877554
+    - type: f1
+      value: 81.22331922899193
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/LCQMC
+      name: MTEB LCQMC
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 72.21054197899034
+    - type: cos_sim_spearman
+      value: 77.10172371889475
+    - type: euclidean_pearson
+      value: 76.15914782847307
+    - type: euclidean_spearman
+      value: 77.10173036795658
+    - type: manhattan_pearson
+      value: 76.16257390318928
+    - type: manhattan_spearman
+      value: 77.10538180843567
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/Mmarco-reranking
+      name: MTEB MMarcoReranking
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map
+      value: 26.968179320629726
+    - type: mrr
+      value: 25.664285714285718
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/MMarcoRetrieval
+      name: MTEB MMarcoRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 66.674
+    - type: map_at_10
+      value: 75.624
+    - type: map_at_100
+      value: 75.96199999999999
+    - type: map_at_1000
+      value: 75.973
+    - type: map_at_3
+      value: 73.9
+    - type: map_at_5
+      value: 75.007
+    - type: mrr_at_1
+      value: 68.89699999999999
+    - type: mrr_at_10
+      value: 76.212
+    - type: mrr_at_100
+      value: 76.506
+    - type: mrr_at_1000
+      value: 76.517
+    - type: mrr_at_3
+      value: 74.72999999999999
+    - type: mrr_at_5
+      value: 75.65899999999999
+    - type: ndcg_at_1
+      value: 68.89699999999999
+    - type: ndcg_at_10
+      value: 79.19
+    - type: ndcg_at_100
+      value: 80.681
+    - type: ndcg_at_1000
+      value: 80.97999999999999
+    - type: ndcg_at_3
+      value: 75.954
+    - type: ndcg_at_5
+      value: 77.792
+    - type: precision_at_1
+      value: 68.89699999999999
+    - type: precision_at_10
+      value: 9.519
+    - type: precision_at_100
+      value: 1.026
+    - type: precision_at_1000
+      value: 0.105
+    - type: precision_at_3
+      value: 28.548000000000002
+    - type: precision_at_5
+      value: 18.117
+    - type: recall_at_1
+      value: 66.674
+    - type: recall_at_10
+      value: 89.55499999999999
+    - type: recall_at_100
+      value: 96.26
+    - type: recall_at_1000
+      value: 98.598
+    - type: recall_at_3
+      value: 81.029
+    - type: recall_at_5
+      value: 85.37700000000001
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_massive_intent
+      name: MTEB MassiveIntentClassification (zh-CN)
+      config: zh-CN
+      split: test
+      revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
+    metrics:
+    - type: accuracy
+      value: 68.13718897108271
+    - type: f1
+      value: 66.00508413016382
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_massive_scenario
+      name: MTEB MassiveScenarioClassification (zh-CN)
+      config: zh-CN
+      split: test
+      revision: 7d571f92784cd94a019292a1f45445077d0ef634
+    metrics:
+    - type: accuracy
+      value: 72.542030934768
+    - type: f1
+      value: 71.87970959109703
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/MedicalRetrieval
+      name: MTEB MedicalRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 51.2
+    - type: map_at_10
+      value: 57.211999999999996
+    - type: map_at_100
+      value: 57.74
+    - type: map_at_1000
+      value: 57.791000000000004
+    - type: map_at_3
+      value: 55.900000000000006
+    - type: map_at_5
+      value: 56.665
+    - type: mrr_at_1
+      value: 51.300000000000004
+    - type: mrr_at_10
+      value: 57.252
+    - type: mrr_at_100
+      value: 57.789
+    - type: mrr_at_1000
+      value: 57.84
+    - type: mrr_at_3
+      value: 55.95
+    - type: mrr_at_5
+      value: 56.715
+    - type: ndcg_at_1
+      value: 51.2
+    - type: ndcg_at_10
+      value: 59.998
+    - type: ndcg_at_100
+      value: 62.971999999999994
+    - type: ndcg_at_1000
+      value: 64.453
+    - type: ndcg_at_3
+      value: 57.321
+    - type: ndcg_at_5
+      value: 58.711
+    - type: precision_at_1
+      value: 51.2
+    - type: precision_at_10
+      value: 6.87
+    - type: precision_at_100
+      value: 0.835
+    - type: precision_at_1000
+      value: 0.095
+    - type: precision_at_3
+      value: 20.467
+    - type: precision_at_5
+      value: 12.959999999999999
+    - type: recall_at_1
+      value: 51.2
+    - type: recall_at_10
+      value: 68.7
+    - type: recall_at_100
+      value: 83.5
+    - type: recall_at_1000
+      value: 95.39999999999999
+    - type: recall_at_3
+      value: 61.4
+    - type: recall_at_5
+      value: 64.8
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/MultilingualSentiment-classification
+      name: MTEB MultilingualSentiment
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 73.33000000000001
+    - type: f1
+      value: 72.76740880461465
+  - task:
+      type: PairClassification
+    dataset:
+      type: C-MTEB/OCNLI
+      name: MTEB Ocnli
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: cos_sim_accuracy
+      value: 75.09474824038982
+    - type: cos_sim_ap
+      value: 79.49093167837522
+    - type: cos_sim_f1
+      value: 77.762619372442
+    - type: cos_sim_precision
+      value: 68.29073482428115
+    - type: cos_sim_recall
+      value: 90.28511087645195
+    - type: dot_accuracy
+      value: 75.09474824038982
+    - type: dot_ap
+      value: 79.49093167837522
+    - type: dot_f1
+      value: 77.762619372442
+    - type: dot_precision
+      value: 68.29073482428115
+    - type: dot_recall
+      value: 90.28511087645195
+    - type: euclidean_accuracy
+      value: 75.09474824038982
+    - type: euclidean_ap
+      value: 79.49093167837522
+    - type: euclidean_f1
+      value: 77.762619372442
+    - type: euclidean_precision
+      value: 68.29073482428115
+    - type: euclidean_recall
+      value: 90.28511087645195
+    - type: manhattan_accuracy
+      value: 74.93232268543584
+    - type: manhattan_ap
+      value: 79.50256779527038
+    - type: manhattan_f1
+      value: 77.3749426342359
+    - type: manhattan_precision
+      value: 68.42532467532467
+    - type: manhattan_recall
+      value: 89.01795142555439
+    - type: max_accuracy
+      value: 75.09474824038982
+    - type: max_ap
+      value: 79.50256779527038
+    - type: max_f1
+      value: 77.762619372442
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/OnlineShopping-classification
+      name: MTEB OnlineShopping
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 91.71
+    - type: ap
+      value: 89.30664330630684
+    - type: f1
+      value: 91.69380669543091
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/PAWSX
+      name: MTEB PAWSX
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 27.87844586552044
+    - type: cos_sim_spearman
+      value: 33.55828345961726
+    - type: euclidean_pearson
+      value: 34.008422591348754
+    - type: euclidean_spearman
+      value: 33.55828173553759
+    - type: manhattan_pearson
+      value: 33.97354762221951
+    - type: manhattan_spearman
+      value: 33.55061748217219
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/QBQTC
+      name: MTEB QBQTC
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 37.16475906990342
+    - type: cos_sim_spearman
+      value: 39.02023124990304
+    - type: euclidean_pearson
+      value: 37.12905621621282
+    - type: euclidean_spearman
+      value: 39.02017798495793
+    - type: manhattan_pearson
+      value: 37.16400100601629
+    - type: manhattan_spearman
+      value: 39.027383935772335
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts22-crosslingual-sts
+      name: MTEB STS22 (zh)
+      config: zh
+      split: test
+      revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
+    metrics:
+    - type: cos_sim_pearson
+      value: 66.7431509369159
+    - type: cos_sim_spearman
+      value: 69.10355047922879
+    - type: euclidean_pearson
+      value: 67.48723360063258
+    - type: euclidean_spearman
+      value: 69.10355047922879
+    - type: manhattan_pearson
+      value: 67.55981324291854
+    - type: manhattan_spearman
+      value: 69.1816947077302
+  - task:
+      type: STS
+    dataset:
+      type: C-MTEB/STSB
+      name: MTEB STSB
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: cos_sim_pearson
+      value: 78.27412453529412
+    - type: cos_sim_spearman
+      value: 78.74292565872022
+    - type: euclidean_pearson
+      value: 77.95359390335884
+    - type: euclidean_spearman
+      value: 78.7428438579602
+    - type: manhattan_pearson
+      value: 77.99252788851469
+    - type: manhattan_spearman
+      value: 78.80401873296358
+  - task:
+      type: Reranking
+    dataset:
+      type: C-MTEB/T2Reranking
+      name: MTEB T2Reranking
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map
+      value: 66.42334440897298
+    - type: mrr
+      value: 76.24570128209263
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/T2Retrieval
+      name: MTEB T2Retrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 27.323999999999998
+    - type: map_at_10
+      value: 76.752
+    - type: map_at_100
+      value: 80.39
+    - type: map_at_1000
+      value: 80.457
+    - type: map_at_3
+      value: 53.93
+    - type: map_at_5
+      value: 66.263
+    - type: mrr_at_1
+      value: 89.90899999999999
+    - type: mrr_at_10
+      value: 92.35
+    - type: mrr_at_100
+      value: 92.43599999999999
+    - type: mrr_at_1000
+      value: 92.44
+    - type: mrr_at_3
+      value: 91.92
+    - type: mrr_at_5
+      value: 92.192
+    - type: ndcg_at_1
+      value: 89.90899999999999
+    - type: ndcg_at_10
+      value: 84.352
+    - type: ndcg_at_100
+      value: 87.978
+    - type: ndcg_at_1000
+      value: 88.631
+    - type: ndcg_at_3
+      value: 85.845
+    - type: ndcg_at_5
+      value: 84.35000000000001
+    - type: precision_at_1
+      value: 89.90899999999999
+    - type: precision_at_10
+      value: 41.985
+    - type: precision_at_100
+      value: 5.007000000000001
+    - type: precision_at_1000
+      value: 0.516
+    - type: precision_at_3
+      value: 75.146
+    - type: precision_at_5
+      value: 62.92100000000001
+    - type: recall_at_1
+      value: 27.323999999999998
+    - type: recall_at_10
+      value: 83.221
+    - type: recall_at_100
+      value: 95.088
+    - type: recall_at_1000
+      value: 98.436
+    - type: recall_at_3
+      value: 55.58
+    - type: recall_at_5
+      value: 69.594
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/TNews-classification
+      name: MTEB TNews
+      config: default
+      split: validation
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 50.453
+    - type: f1
+      value: 48.736715267813835
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/ThuNewsClusteringP2P
+      name: MTEB ThuNewsClusteringP2P
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 59.153574405500706
+  - task:
+      type: Clustering
+    dataset:
+      type: C-MTEB/ThuNewsClusteringS2S
+      name: MTEB ThuNewsClusteringS2S
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: v_measure
+      value: 52.79421409479782
+  - task:
+      type: Retrieval
+    dataset:
+      type: C-MTEB/VideoRetrieval
+      name: MTEB VideoRetrieval
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 56.699999999999996
+    - type: map_at_10
+      value: 66.834
+    - type: map_at_100
+      value: 67.313
+    - type: map_at_1000
+      value: 67.325
+    - type: map_at_3
+      value: 65.017
+    - type: map_at_5
+      value: 65.927
+    - type: mrr_at_1
+      value: 56.699999999999996
+    - type: mrr_at_10
+      value: 66.834
+    - type: mrr_at_100
+      value: 67.313
+    - type: mrr_at_1000
+      value: 67.325
+    - type: mrr_at_3
+      value: 65.017
+    - type: mrr_at_5
+      value: 65.927
+    - type: ndcg_at_1
+      value: 56.699999999999996
+    - type: ndcg_at_10
+      value: 71.576
+    - type: ndcg_at_100
+      value: 73.79400000000001
+    - type: ndcg_at_1000
+      value: 74.08200000000001
+    - type: ndcg_at_3
+      value: 67.73400000000001
+    - type: ndcg_at_5
+      value: 69.378
+    - type: precision_at_1
+      value: 56.699999999999996
+    - type: precision_at_10
+      value: 8.64
+    - type: precision_at_100
+      value: 0.9650000000000001
+    - type: precision_at_1000
+      value: 0.099
+    - type: precision_at_3
+      value: 25.2
+    - type: precision_at_5
+      value: 15.920000000000002
+    - type: recall_at_1
+      value: 56.699999999999996
+    - type: recall_at_10
+      value: 86.4
+    - type: recall_at_100
+      value: 96.5
+    - type: recall_at_1000
+      value: 98.7
+    - type: recall_at_3
+      value: 75.6
+    - type: recall_at_5
+      value: 79.60000000000001
+  - task:
+      type: Classification
+    dataset:
+      type: C-MTEB/waimai-classification
+      name: MTEB Waimai
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: accuracy
+      value: 86.83
+    - type: ap
+      value: 70.2908139255317
+    - type: f1
+      value: 85.19267443803346
 ---
+a try for emebdding model

added_tokens.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "[CLS]": 101,
+  "[MASK]": 103,
+  "[PAD]": 0,
+  "[SEP]": 102,
+  "[UNK]": 100
+}

config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "./model/tao-8k",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 8192,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.34.0",
+  "type_vocab_size": 2,
+  "uniem_pooling_strategy": "last_mean",
+  "use_cache": true,
+  "vocab_size": 21128
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b165a06efd456ce41f0678aebbf579751d5504bd1ce48198b611c1ae00058e8
+size 666905321

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "additional_special_tokens": [
+    "[PAD]",
+    "[UNK]",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]"
+  ],
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "[PAD]",
+    "[UNK]",
+    "[CLS]",
+    "[SEP]",
+    "[MASK]"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 8192,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff