OpenLLM-Ro
/

RoLlama2-7b-Instruct-DPO

Safetensors

Romanian

llama

Eval Results

Model card Files Files and versions Community

mihaimasala commited on Oct 10

Commit

1837e89

•

1 Parent(s): c85bb73

Update README.md

Browse files

Files changed (1) hide show

README.md +474 -473

README.md CHANGED Viewed

@@ -7,479 +7,480 @@ base_model:
 datasets:
 - OpenLLM-Ro/ro_dpo_helpsteer
 model-index:
-- name: OpenLLM-Ro/RoLlama2-7b-Instruct-DPO-2024-10-09
-  results:
-  - task:
-      type: text-generation
-    dataset:
-      name: RoMT-Bench
-      type: RoMT-Bench
-    metrics:
-    - name: Score
-      type: Score
-      value: 4.61
-  - task:
-      type: text-generation
-    dataset:
-      name: RoCulturaBench
-      type: RoCulturaBench
-    metrics:
-    - name: Score
-      type: Score
-      value: 4.8
-  - task:
-      type: text-generation
-    dataset:
-      name: Romanian_Academic_Benchmarks
-      type: Romanian_Academic_Benchmarks
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 43.2
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_arc_challenge
-      type: OpenLLM-Ro/ro_arc_challenge
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 44.24
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_mmlu
-      type: OpenLLM-Ro/ro_mmlu
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 38.39
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_winogrande
-      type: OpenLLM-Ro/ro_winogrande
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 62.57
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_hellaswag
-      type: OpenLLM-Ro/ro_hellaswag
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 59.2
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_gsm8k
-      type: OpenLLM-Ro/ro_gsm8k
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 15.72
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_truthfulqa
-      type: OpenLLM-Ro/ro_truthfulqa
-    metrics:
-    - name: Average accuracy
-      type: accuracy
-      value: 39.07
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary
-      type: LaRoSeDa_binary
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 97.31
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass
-      type: LaRoSeDa_multiclass
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 60.56
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary_finetuned
-      type: LaRoSeDa_binary_finetuned
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass_finetuned
-      type: LaRoSeDa_multiclass_finetuned
-    metrics:
-    - name: Average macro-f1
-      type: macro-f1
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO
-      type: WMT_EN-RO
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 26.56
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN
-      type: WMT_RO-EN
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 21.68
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO_finetuned
-      type: WMT_EN-RO_finetuned
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN_finetuned
-      type: WMT_RO-EN_finetuned
-    metrics:
-    - name: Average bleu
-      type: bleu
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD
-      type: XQuAD
-    metrics:
-    - name: Average exact_match
-      type: exact_match
-      value: 35.78
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD
-      type: XQuAD
-    metrics:
-    - name: Average f1
-      type: f1
-      value: 59.31
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_finetuned
-      type: XQuAD_finetuned
-    metrics:
-    - name: Average exact_match
-      type: exact_match
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_finetuned
-      type: XQuAD_finetuned
-    metrics:
-    - name: Average f1
-      type: f1
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: Average spearman
-      type: spearman
-      value: 61.22
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: Average pearson
-      type: pearson
-      value: 58.41
-  - task:
-      type: text-generation
-    dataset:
-      name: STS_finetuned
-      type: STS_finetuned
-    metrics:
-    - name: Average spearman
-      type: spearman
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: STS_finetuned
-      type: STS_finetuned
-    metrics:
-    - name: Average pearson
-      type: pearson
-      value: 0
-  - task:
-      type: text-generation
-    dataset:
-      name: RoMT-Bench
-      type: RoMT-Bench
-    metrics:
-    - name: First turn
-      type: Score
-      value: 5.15
-    - name: Second turn
-      type: Score
-      value: 4.06
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_arc_challenge
-      type: OpenLLM-Ro/ro_arc_challenge
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 42.67
-    - name: 1-shot
-      type: accuracy
-      value: 43.36
-    - name: 3-shot
-      type: accuracy
-      value: 44.13
-    - name: 5-shot
-      type: accuracy
-      value: 44.3
-    - name: 10-shot
-      type: accuracy
-      value: 45.67
-    - name: 25-shot
-      type: accuracy
-      value: 45.33
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_mmlu
-      type: OpenLLM-Ro/ro_mmlu
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 36.62
-    - name: 1-shot
-      type: accuracy
-      value: 38.04
-    - name: 3-shot
-      type: accuracy
-      value: 39.52
-    - name: 5-shot
-      type: accuracy
-      value: 39.36
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_winogrande
-      type: OpenLLM-Ro/ro_winogrande
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 61.72
-    - name: 1-shot
-      type: accuracy
-      value: 62.04
-    - name: 3-shot
-      type: accuracy
-      value: 63.85
-    - name: 5-shot
-      type: accuracy
-      value: 62.67
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_hellaswag
-      type: OpenLLM-Ro/ro_hellaswag
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 58.75
-    - name: 1-shot
-      type: accuracy
-      value: 58.29
-    - name: 3-shot
-      type: accuracy
-      value: 59.28
-    - name: 5-shot
-      type: accuracy
-      value: 59.68
-    - name: 10-shot
-      type: accuracy
-      value: 60.01
-  - task:
-      type: text-generation
-    dataset:
-      name: OpenLLM-Ro/ro_gsm8k
-      type: OpenLLM-Ro/ro_gsm8k
-    metrics:
-    - name: 0-shot
-      type: accuracy
-      value: 11.14
-    - name: 1-shot
-      type: accuracy
-      value: 17.97
-    - name: 3-shot
-      type: accuracy
-      value: 18.04
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_binary
-      type: LaRoSeDa_binary
-    metrics:
-    - name: 0-shot
-      type: macro-f1
-      value: 98.03
-    - name: 1-shot
-      type: macro-f1
-      value: 95.96
-    - name: 3-shot
-      type: macro-f1
-      value: 97.33
-    - name: 5-shot
-      type: macro-f1
-      value: 97.9
-  - task:
-      type: text-generation
-    dataset:
-      name: LaRoSeDa_multiclass
-      type: LaRoSeDa_multiclass
-    metrics:
-    - name: 0-shot
-      type: macro-f1
-      value: 60.67
-    - name: 1-shot
-      type: macro-f1
-      value: 51.37
-    - name: 3-shot
-      type: macro-f1
-      value: 62.49
-    - name: 5-shot
-      type: macro-f1
-      value: 67.7
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_EN-RO
-      type: WMT_EN-RO
-    metrics:
-    - name: 0-shot
-      type: bleu
-      value: 19.83
-    - name: 1-shot
-      type: bleu
-      value: 29.04
-    - name: 3-shot
-      type: bleu
-      value: 28.9
-    - name: 5-shot
-      type: bleu
-      value: 28.47
-  - task:
-      type: text-generation
-    dataset:
-      name: WMT_RO-EN
-      type: WMT_RO-EN
-    metrics:
-    - name: 0-shot
-      type: bleu
-      value: 1.74
-    - name: 1-shot
-      type: bleu
-      value: 15.28
-    - name: 3-shot
-      type: bleu
-      value: 34.13
-    - name: 5-shot
-      type: bleu
-      value: 35.56
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_EM
-      type: XQuAD_EM
-    metrics:
-    - name: 0-shot
-      type: exact_match
-      value: 26.97
-    - name: 1-shot
-      type: exact_match
-      value: 36.3
-    - name: 3-shot
-      type: exact_match
-      value: 40.25
-    - name: 5-shot
-      type: exact_match
-      value: 39.58
-  - task:
-      type: text-generation
-    dataset:
-      name: XQuAD_F1
-      type: XQuAD_F1
-    metrics:
-    - name: 0-shot
-      type: f1
-      value: 52.9
-    - name: 1-shot
-      type: f1
-      value: 60.05
-    - name: 3-shot
-      type: f1
-      value: 62.08
-    - name: 5-shot
-      type: f1
-      value: 62.22
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: 0-shot
-      type: spearman
-      value: 62.07
-    - name: 1-shot
-      type: spearman
-      value: 59.47
-    - name: 3-shot
-      type: spearman
-      value: 62.12
-  - task:
-      type: text-generation
-    dataset:
-      name: STS
-      type: STS
-    metrics:
-    - name: 0-shot
-      type: pearson
-      value: 60.6
-    - name: 1-shot
-      type: pearson
-      value: 56.44
-    - name: 3-shot
-      type: pearson
-      value: 58.18
 ---
 # Model Card for Model ID

 datasets:
 - OpenLLM-Ro/ro_dpo_helpsteer
 model-index:
+    - name: OpenLLM-Ro/RoLlama2-7b-Instruct-DPO-2024-10-09
+      results:
+        - task:
+            type: text-generation
+          dataset:
+            name: RoMT-Bench
+            type: RoMT-Bench
+          metrics:
+            - name: Score
+              type: Score
+              value: 4.61
+        - task:
+            type: text-generation
+          dataset:
+            name: RoCulturaBench
+            type: RoCulturaBench
+          metrics:
+            - name: Score
+              type: Score
+              value: 4.80
+        - task:
+            type: text-generation
+          dataset:
+            name: Romanian_Academic_Benchmarks
+            type: Romanian_Academic_Benchmarks
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 43.20
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 44.24
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 38.39
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 62.57
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 59.20
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 15.72
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_truthfulqa
+            type: OpenLLM-Ro/ro_truthfulqa
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 39.07
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 97.31
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 60.56
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary_finetuned
+            type: LaRoSeDa_binary_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass_finetuned
+            type: LaRoSeDa_multiclass_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 26.56
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 21.68
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO_finetuned
+            type: WMT_EN-RO_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN_finetuned
+            type: WMT_RO-EN_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 35.78
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 59.31
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 61.22
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 58.41
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 0.00
+        - task:
+            type: text-generation
+          dataset:
+            name: RoMT-Bench
+            type: RoMT-Bench
+          metrics:
+            - name: First turn
+              type: Score
+              value: 5.15
+            - name: Second turn
+              type: Score
+              value: 4.06
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 42.67
+            - name: 1-shot
+              type: accuracy
+              value: 43.36
+            - name: 3-shot
+              type: accuracy
+              value: 44.13
+            - name: 5-shot
+              type: accuracy
+              value: 44.30
+            - name: 10-shot
+              type: accuracy
+              value: 45.67
+            - name: 25-shot
+              type: accuracy
+              value: 45.33
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 36.62
+            - name: 1-shot
+              type: accuracy
+              value: 38.04
+            - name: 3-shot
+              type: accuracy
+              value: 39.52
+            - name: 5-shot
+              type: accuracy
+              value: 39.36
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 61.72
+            - name: 1-shot
+              type: accuracy
+              value: 62.04
+            - name: 3-shot
+              type: accuracy
+              value: 63.85
+            - name: 5-shot
+              type: accuracy
+              value: 62.67
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 58.75
+            - name: 1-shot
+              type: accuracy
+              value: 58.29
+            - name: 3-shot
+              type: accuracy
+              value: 59.28
+            - name: 5-shot
+              type: accuracy
+              value: 59.68
+            - name: 10-shot
+              type: accuracy
+              value: 60.01
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 11.14
+            - name: 1-shot
+              type: accuracy
+              value: 17.97
+            - name: 3-shot
+              type: accuracy
+              value: 18.04
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 98.03
+            - name: 1-shot
+              type: macro-f1
+              value: 95.96
+            - name: 3-shot
+              type: macro-f1
+              value: 97.33
+            - name: 5-shot
+              type: macro-f1
+              value: 97.90
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 60.67
+            - name: 1-shot
+              type: macro-f1
+              value: 51.37
+            - name: 3-shot
+              type: macro-f1
+              value: 62.49
+            - name: 5-shot
+              type: macro-f1
+              value: 67.70
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 19.83
+            - name: 1-shot
+              type: bleu
+              value: 29.04
+            - name: 3-shot
+              type: bleu
+              value: 28.90
+            - name: 5-shot
+              type: bleu
+              value: 28.47
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 1.74
+            - name: 1-shot
+              type: bleu
+              value: 15.28
+            - name: 3-shot
+              type: bleu
+              value: 34.13
+            - name: 5-shot
+              type: bleu
+              value: 35.56
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_EM
+            type: XQuAD_EM
+          metrics:
+            - name: 0-shot
+              type: exact_match
+              value: 26.97
+            - name: 1-shot
+              type: exact_match
+              value: 36.30
+            - name: 3-shot
+              type: exact_match
+              value: 40.25
+            - name: 5-shot
+              type: exact_match
+              value: 39.58
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_F1
+            type: XQuAD_F1
+          metrics:
+            - name: 0-shot
+              type: f1
+              value: 52.90
+            - name: 1-shot
+              type: f1
+              value: 60.05
+            - name: 3-shot
+              type: f1
+              value: 62.08
+            - name: 5-shot
+              type: f1
+              value: 62.22
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_Spearman
+            type: STS_Spearman
+          metrics:
+            - name: 1-shot
+              type: spearman
+              value: 62.07
+            - name: 3-shot
+              type: spearman
+              value: 59.47
+            - name: 5-shot
+              type: spearman
+              value: 62.12
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_Pearson
+            type: STS_Pearson
+          metrics:
+            - name: 1-shot
+              type: pearson
+              value: 60.60
+            - name: 3-shot
+              type: pearson
+              value: 56.44
+            - name: 5-shot
+              type: pearson
+              value: 58.18
 ---
 # Model Card for Model ID