PyTorch
Romanian
llama
Eval Results
mihaimasala commited on
Commit
f2b05bc
1 Parent(s): acea762

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +445 -444
README.md CHANGED
@@ -3,452 +3,453 @@ license: llama2
3
  language:
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
6
- model-index:
7
- - name: OpenLLM-Ro/RoLlama2-7b-Base-2024-05-14
8
- results:
9
- - task:
10
- type: text-generation
11
- dataset:
12
- name: Romanian_Academic_Benchmarks
13
- type: Romanian_Academic_Benchmarks
14
- metrics:
15
- - name: Average accuracy
16
- type: accuracy
17
- value: 38.03
18
- - task:
19
- type: text-generation
20
- dataset:
21
- name: OpenLLM-Ro/ro_arc_challenge
22
- type: OpenLLM-Ro/ro_arc_challenge
23
- metrics:
24
- - name: Average accuracy
25
- type: accuracy
26
- value: 37.95
27
- - task:
28
- type: text-generation
29
- dataset:
30
- name: OpenLLM-Ro/ro_mmlu
31
- type: OpenLLM-Ro/ro_mmlu
32
- metrics:
33
- - name: Average accuracy
34
- type: accuracy
35
- value: 27.22
36
- - task:
37
- type: text-generation
38
- dataset:
39
- name: OpenLLM-Ro/ro_winogrande
40
- type: OpenLLM-Ro/ro_winogrande
41
- metrics:
42
- - name: Average accuracy
43
- type: accuracy
44
- value: 59.29
45
- - task:
46
- type: text-generation
47
- dataset:
48
- name: OpenLLM-Ro/ro_hellaswag
49
- type: OpenLLM-Ro/ro_hellaswag
50
- metrics:
51
- - name: Average accuracy
52
- type: accuracy
53
- value: 57.22
54
- - task:
55
- type: text-generation
56
- dataset:
57
- name: OpenLLM-Ro/ro_gsm8k
58
- type: OpenLLM-Ro/ro_gsm8k
59
- metrics:
60
- - name: Average accuracy
61
- type: accuracy
62
- value: 2.53
63
- - task:
64
- type: text-generation
65
- dataset:
66
- name: OpenLLM-Ro/ro_truthfulqa
67
- type: OpenLLM-Ro/ro_truthfulqa
68
- metrics:
69
- - name: Average accuracy
70
- type: accuracy
71
- value: 44
72
- - task:
73
- type: text-generation
74
- dataset:
75
- name: LaRoSeDa_binary
76
- type: LaRoSeDa_binary
77
- metrics:
78
- - name: Average macro-f1
79
- type: macro-f1
80
- value: 83.25
81
- - task:
82
- type: text-generation
83
- dataset:
84
- name: LaRoSeDa_multiclass
85
- type: LaRoSeDa_multiclass
86
- metrics:
87
- - name: Average macro-f1
88
- type: macro-f1
89
- value: 61.04
90
- - task:
91
- type: text-generation
92
- dataset:
93
- name: LaRoSeDa_binary_finetuned
94
- type: LaRoSeDa_binary_finetuned
95
- metrics:
96
- - name: Average macro-f1
97
- type: macro-f1
98
- value: 98.97
99
- - task:
100
- type: text-generation
101
- dataset:
102
- name: LaRoSeDa_multiclass_finetuned
103
- type: LaRoSeDa_multiclass_finetuned
104
- metrics:
105
- - name: Average macro-f1
106
- type: macro-f1
107
- value: 87.72
108
- - task:
109
- type: text-generation
110
- dataset:
111
- name: WMT_EN-RO
112
- type: WMT_EN-RO
113
- metrics:
114
- - name: Average bleu
115
- type: bleu
116
- value: 10.01
117
- - task:
118
- type: text-generation
119
- dataset:
120
- name: WMT_RO-EN
121
- type: WMT_RO-EN
122
- metrics:
123
- - name: Average bleu
124
- type: bleu
125
- value: 13.03
126
- - task:
127
- type: text-generation
128
- dataset:
129
- name: WMT_EN-RO_finetuned
130
- type: WMT_EN-RO_finetuned
131
- metrics:
132
- - name: Average bleu
133
- type: bleu
134
- value: 27.85
135
- - task:
136
- type: text-generation
137
- dataset:
138
- name: WMT_RO-EN_finetuned
139
- type: WMT_RO-EN_finetuned
140
- metrics:
141
- - name: Average bleu
142
- type: bleu
143
- value: 39.3
144
- - task:
145
- type: text-generation
146
- dataset:
147
- name: XQuAD
148
- type: XQuAD
149
- metrics:
150
- - name: Average exact_match
151
- type: exact_match
152
- value: 30.15
153
- - task:
154
- type: text-generation
155
- dataset:
156
- name: XQuAD
157
- type: XQuAD
158
- metrics:
159
- - name: Average f1
160
- type: f1
161
- value: 47.03
162
- - task:
163
- type: text-generation
164
- dataset:
165
- name: XQuAD_finetuned
166
- type: XQuAD_finetuned
167
- metrics:
168
- - name: Average exact_match
169
- type: exact_match
170
- value: 67.06
171
- - task:
172
- type: text-generation
173
- dataset:
174
- name: XQuAD_finetuned
175
- type: XQuAD_finetuned
176
- metrics:
177
- - name: Average f1
178
- type: f1
179
- value: 79.96
180
- - task:
181
- type: text-generation
182
- dataset:
183
- name: STS
184
- type: STS
185
- metrics:
186
- - name: Average spearman
187
- type: spearman
188
- value: 7.89
189
- - task:
190
- type: text-generation
191
- dataset:
192
- name: STS
193
- type: STS
194
- metrics:
195
- - name: Average pearson
196
- type: pearson
197
- value: 7.98
198
- - task:
199
- type: text-generation
200
- dataset:
201
- name: STS_finetuned
202
- type: STS_finetuned
203
- metrics:
204
- - name: Average spearman
205
- type: spearman
206
- value: 71.75
207
- - task:
208
- type: text-generation
209
- dataset:
210
- name: STS_finetuned
211
- type: STS_finetuned
212
- metrics:
213
- - name: Average pearson
214
- type: pearson
215
- value: 71.99
216
- - task:
217
- type: text-generation
218
- dataset:
219
- name: OpenLLM-Ro/ro_arc_challenge
220
- type: OpenLLM-Ro/ro_arc_challenge
221
- metrics:
222
- - name: 0-shot
223
- type: accuracy
224
- value: 35.56
225
- - name: 1-shot
226
- type: accuracy
227
- value: 36.42
228
- - name: 3-shot
229
- type: accuracy
230
- value: 38.56
231
- - name: 5-shot
232
- type: accuracy
233
- value: 38.39
234
- - name: 10-shot
235
- type: accuracy
236
- value: 39.07
237
- - name: 25-shot
238
- type: accuracy
239
- value: 39.67
240
- - task:
241
- type: text-generation
242
- dataset:
243
- name: OpenLLM-Ro/ro_mmlu
244
- type: OpenLLM-Ro/ro_mmlu
245
- metrics:
246
- - name: 0-shot
247
- type: accuracy
248
- value: 25.82
249
- - name: 1-shot
250
- type: accuracy
251
- value: 25.48
252
- - name: 3-shot
253
- type: accuracy
254
- value: 27.61
255
- - name: 5-shot
256
- type: accuracy
257
- value: 29.96
258
- - task:
259
- type: text-generation
260
- dataset:
261
- name: OpenLLM-Ro/ro_winogrande
262
- type: OpenLLM-Ro/ro_winogrande
263
- metrics:
264
- - name: 0-shot
265
- type: accuracy
266
- value: 58.72
267
- - name: 1-shot
268
- type: accuracy
269
- value: 58.88
270
- - name: 3-shot
271
- type: accuracy
272
- value: 60.38
273
- - name: 5-shot
274
- type: accuracy
275
- value: 59.19
276
- - task:
277
- type: text-generation
278
- dataset:
279
- name: OpenLLM-Ro/ro_hellaswag
280
- type: OpenLLM-Ro/ro_hellaswag
281
- metrics:
282
- - name: 0-shot
283
- type: accuracy
284
- value: 55.85
285
- - name: 1-shot
286
- type: accuracy
287
- value: 57.06
288
- - name: 3-shot
289
- type: accuracy
290
- value: 57.52
291
- - name: 5-shot
292
- type: accuracy
293
- value: 57.89
294
- - name: 10-shot
295
- type: accuracy
296
- value: 57.79
297
- - task:
298
- type: text-generation
299
- dataset:
300
- name: OpenLLM-Ro/ro_gsm8k
301
- type: OpenLLM-Ro/ro_gsm8k
302
- metrics:
303
- - name: 0-shot
304
- type: accuracy
305
- value: 0
306
- - name: 1-shot
307
- type: accuracy
308
- value: 2.96
309
- - name: 3-shot
310
- type: accuracy
311
- value: 4.62
312
- - task:
313
- type: text-generation
314
- dataset:
315
- name: LaRoSeDa_binary
316
- type: LaRoSeDa_binary
317
- metrics:
318
- - name: 0-shot
319
- type: macro-f1
320
- value: 42.78
321
- - name: 1-shot
322
- type: macro-f1
323
- value: 98
324
- - name: 3-shot
325
- type: macro-f1
326
- value: 95.13
327
- - name: 5-shot
328
- type: macro-f1
329
- value: 97.07
330
- - task:
331
- type: text-generation
332
- dataset:
333
- name: LaRoSeDa_multiclass
334
- type: LaRoSeDa_multiclass
335
- metrics:
336
- - name: 0-shot
337
- type: macro-f1
338
- value: 46.41
339
- - name: 1-shot
340
- type: macro-f1
341
- value: 67.36
342
- - name: 3-shot
343
- type: macro-f1
344
- value: 65.16
345
- - name: 5-shot
346
- type: macro-f1
347
- value: 65.23
348
- - task:
349
- type: text-generation
350
- dataset:
351
- name: WMT_EN-RO
352
- type: WMT_EN-RO
353
- metrics:
354
- - name: 0-shot
355
- type: bleu
356
- value: 4.45
357
- - name: 1-shot
358
- type: bleu
359
- value: 8.61
360
- - name: 3-shot
361
- type: bleu
362
- value: 12.25
363
- - name: 5-shot
364
- type: bleu
365
- value: 14.73
366
- - task:
367
- type: text-generation
368
- dataset:
369
- name: WMT_RO-EN
370
- type: WMT_RO-EN
371
- metrics:
372
- - name: 0-shot
373
- type: bleu
374
- value: 1.29
375
- - name: 1-shot
376
- type: bleu
377
- value: 10.78
378
- - name: 3-shot
379
- type: bleu
380
- value: 16.82
381
- - name: 5-shot
382
- type: bleu
383
- value: 23.24
384
- - task:
385
- type: text-generation
386
- dataset:
387
- name: XQuAD_EM
388
- type: XQuAD_EM
389
- metrics:
390
- - name: 0-shot
391
- type: exact_match
392
- value: 5.29
393
- - name: 1-shot
394
- type: exact_match
395
- value: 33.95
396
- - name: 3-shot
397
- type: exact_match
398
- value: 39.24
399
- - name: 5-shot
400
- type: exact_match
401
- value: 42.1
402
- - task:
403
- type: text-generation
404
- dataset:
405
- name: XQuAD_F1
406
- type: XQuAD_F1
407
- metrics:
408
- - name: 0-shot
409
- type: f1
410
- value: 16.17
411
- - name: 1-shot
412
- type: f1
413
- value: 51.84
414
- - name: 3-shot
415
- type: f1
416
- value: 58.82
417
- - name: 5-shot
418
- type: f1
419
- value: 61.29
420
- - task:
421
- type: text-generation
422
- dataset:
423
- name: STS
424
- type: STS
425
- metrics:
426
- - name: 0-shot
427
- type: spearman
428
- value: -1.74
429
- - name: 1-shot
430
- type: spearman
431
- value: 15.47
432
- - name: 3-shot
433
- type: spearman
434
- value: 9.93
435
- - task:
436
- type: text-generation
437
- dataset:
438
- name: STS
439
- type: STS
440
- metrics:
441
- - name: 0-shot
442
- type: pearson
443
- value: -1.4
444
- - name: 1-shot
445
- type: pearson
446
- value: 15
447
- - name: 3-shot
448
- type: pearson
449
- value: 10.33
450
  datasets:
451
  - uonlp/CulturaX
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  ---
453
 
454
  # Model Card for Model ID
 
3
  language:
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  datasets:
7
  - uonlp/CulturaX
8
+ model-index:
9
+ - name: OpenLLM-Ro/RoLlama2-7b-Base-2024-05-14
10
+ results:
11
+ - task:
12
+ type: text-generation
13
+ dataset:
14
+ name: Romanian_Academic_Benchmarks
15
+ type: Romanian_Academic_Benchmarks
16
+ metrics:
17
+ - name: Average accuracy
18
+ type: accuracy
19
+ value: 38.03
20
+ - task:
21
+ type: text-generation
22
+ dataset:
23
+ name: OpenLLM-Ro/ro_arc_challenge
24
+ type: OpenLLM-Ro/ro_arc_challenge
25
+ metrics:
26
+ - name: Average accuracy
27
+ type: accuracy
28
+ value: 37.95
29
+ - task:
30
+ type: text-generation
31
+ dataset:
32
+ name: OpenLLM-Ro/ro_mmlu
33
+ type: OpenLLM-Ro/ro_mmlu
34
+ metrics:
35
+ - name: Average accuracy
36
+ type: accuracy
37
+ value: 27.22
38
+ - task:
39
+ type: text-generation
40
+ dataset:
41
+ name: OpenLLM-Ro/ro_winogrande
42
+ type: OpenLLM-Ro/ro_winogrande
43
+ metrics:
44
+ - name: Average accuracy
45
+ type: accuracy
46
+ value: 59.29
47
+ - task:
48
+ type: text-generation
49
+ dataset:
50
+ name: OpenLLM-Ro/ro_hellaswag
51
+ type: OpenLLM-Ro/ro_hellaswag
52
+ metrics:
53
+ - name: Average accuracy
54
+ type: accuracy
55
+ value: 57.22
56
+ - task:
57
+ type: text-generation
58
+ dataset:
59
+ name: OpenLLM-Ro/ro_gsm8k
60
+ type: OpenLLM-Ro/ro_gsm8k
61
+ metrics:
62
+ - name: Average accuracy
63
+ type: accuracy
64
+ value: 2.53
65
+ - task:
66
+ type: text-generation
67
+ dataset:
68
+ name: OpenLLM-Ro/ro_truthfulqa
69
+ type: OpenLLM-Ro/ro_truthfulqa
70
+ metrics:
71
+ - name: Average accuracy
72
+ type: accuracy
73
+ value: 44.00
74
+ - task:
75
+ type: text-generation
76
+ dataset:
77
+ name: LaRoSeDa_binary
78
+ type: LaRoSeDa_binary
79
+ metrics:
80
+ - name: Average macro-f1
81
+ type: macro-f1
82
+ value: 83.25
83
+ - task:
84
+ type: text-generation
85
+ dataset:
86
+ name: LaRoSeDa_multiclass
87
+ type: LaRoSeDa_multiclass
88
+ metrics:
89
+ - name: Average macro-f1
90
+ type: macro-f1
91
+ value: 61.04
92
+ - task:
93
+ type: text-generation
94
+ dataset:
95
+ name: LaRoSeDa_binary_finetuned
96
+ type: LaRoSeDa_binary_finetuned
97
+ metrics:
98
+ - name: Average macro-f1
99
+ type: macro-f1
100
+ value: 98.97
101
+ - task:
102
+ type: text-generation
103
+ dataset:
104
+ name: LaRoSeDa_multiclass_finetuned
105
+ type: LaRoSeDa_multiclass_finetuned
106
+ metrics:
107
+ - name: Average macro-f1
108
+ type: macro-f1
109
+ value: 87.72
110
+ - task:
111
+ type: text-generation
112
+ dataset:
113
+ name: WMT_EN-RO
114
+ type: WMT_EN-RO
115
+ metrics:
116
+ - name: Average bleu
117
+ type: bleu
118
+ value: 10.01
119
+ - task:
120
+ type: text-generation
121
+ dataset:
122
+ name: WMT_RO-EN
123
+ type: WMT_RO-EN
124
+ metrics:
125
+ - name: Average bleu
126
+ type: bleu
127
+ value: 13.03
128
+ - task:
129
+ type: text-generation
130
+ dataset:
131
+ name: WMT_EN-RO_finetuned
132
+ type: WMT_EN-RO_finetuned
133
+ metrics:
134
+ - name: Average bleu
135
+ type: bleu
136
+ value: 27.85
137
+ - task:
138
+ type: text-generation
139
+ dataset:
140
+ name: WMT_RO-EN_finetuned
141
+ type: WMT_RO-EN_finetuned
142
+ metrics:
143
+ - name: Average bleu
144
+ type: bleu
145
+ value: 39.30
146
+ - task:
147
+ type: text-generation
148
+ dataset:
149
+ name: XQuAD
150
+ type: XQuAD
151
+ metrics:
152
+ - name: Average exact_match
153
+ type: exact_match
154
+ value: 30.15
155
+ - task:
156
+ type: text-generation
157
+ dataset:
158
+ name: XQuAD
159
+ type: XQuAD
160
+ metrics:
161
+ - name: Average f1
162
+ type: f1
163
+ value: 47.03
164
+ - task:
165
+ type: text-generation
166
+ dataset:
167
+ name: XQuAD_finetuned
168
+ type: XQuAD_finetuned
169
+ metrics:
170
+ - name: Average exact_match
171
+ type: exact_match
172
+ value: 67.06
173
+ - task:
174
+ type: text-generation
175
+ dataset:
176
+ name: XQuAD_finetuned
177
+ type: XQuAD_finetuned
178
+ metrics:
179
+ - name: Average f1
180
+ type: f1
181
+ value: 79.96
182
+ - task:
183
+ type: text-generation
184
+ dataset:
185
+ name: STS
186
+ type: STS
187
+ metrics:
188
+ - name: Average spearman
189
+ type: spearman
190
+ value: 7.89
191
+ - task:
192
+ type: text-generation
193
+ dataset:
194
+ name: STS
195
+ type: STS
196
+ metrics:
197
+ - name: Average pearson
198
+ type: pearson
199
+ value: 7.98
200
+ - task:
201
+ type: text-generation
202
+ dataset:
203
+ name: STS_finetuned
204
+ type: STS_finetuned
205
+ metrics:
206
+ - name: Average spearman
207
+ type: spearman
208
+ value: 71.75
209
+ - task:
210
+ type: text-generation
211
+ dataset:
212
+ name: STS_finetuned
213
+ type: STS_finetuned
214
+ metrics:
215
+ - name: Average pearson
216
+ type: pearson
217
+ value: 71.99
218
+ - task:
219
+ type: text-generation
220
+ dataset:
221
+ name: OpenLLM-Ro/ro_arc_challenge
222
+ type: OpenLLM-Ro/ro_arc_challenge
223
+ metrics:
224
+ - name: 0-shot
225
+ type: accuracy
226
+ value: 35.56
227
+ - name: 1-shot
228
+ type: accuracy
229
+ value: 36.42
230
+ - name: 3-shot
231
+ type: accuracy
232
+ value: 38.56
233
+ - name: 5-shot
234
+ type: accuracy
235
+ value: 38.39
236
+ - name: 10-shot
237
+ type: accuracy
238
+ value: 39.07
239
+ - name: 25-shot
240
+ type: accuracy
241
+ value: 39.67
242
+ - task:
243
+ type: text-generation
244
+ dataset:
245
+ name: OpenLLM-Ro/ro_mmlu
246
+ type: OpenLLM-Ro/ro_mmlu
247
+ metrics:
248
+ - name: 0-shot
249
+ type: accuracy
250
+ value: 25.82
251
+ - name: 1-shot
252
+ type: accuracy
253
+ value: 25.48
254
+ - name: 3-shot
255
+ type: accuracy
256
+ value: 27.61
257
+ - name: 5-shot
258
+ type: accuracy
259
+ value: 29.96
260
+ - task:
261
+ type: text-generation
262
+ dataset:
263
+ name: OpenLLM-Ro/ro_winogrande
264
+ type: OpenLLM-Ro/ro_winogrande
265
+ metrics:
266
+ - name: 0-shot
267
+ type: accuracy
268
+ value: 58.72
269
+ - name: 1-shot
270
+ type: accuracy
271
+ value: 58.88
272
+ - name: 3-shot
273
+ type: accuracy
274
+ value: 60.38
275
+ - name: 5-shot
276
+ type: accuracy
277
+ value: 59.19
278
+ - task:
279
+ type: text-generation
280
+ dataset:
281
+ name: OpenLLM-Ro/ro_hellaswag
282
+ type: OpenLLM-Ro/ro_hellaswag
283
+ metrics:
284
+ - name: 0-shot
285
+ type: accuracy
286
+ value: 55.85
287
+ - name: 1-shot
288
+ type: accuracy
289
+ value: 57.06
290
+ - name: 3-shot
291
+ type: accuracy
292
+ value: 57.52
293
+ - name: 5-shot
294
+ type: accuracy
295
+ value: 57.89
296
+ - name: 10-shot
297
+ type: accuracy
298
+ value: 57.79
299
+ - task:
300
+ type: text-generation
301
+ dataset:
302
+ name: OpenLLM-Ro/ro_gsm8k
303
+ type: OpenLLM-Ro/ro_gsm8k
304
+ metrics:
305
+ - name: 0-shot
306
+ type: accuracy
307
+ value: 0.00
308
+ - name: 1-shot
309
+ type: accuracy
310
+ value: 2.96
311
+ - name: 3-shot
312
+ type: accuracy
313
+ value: 4.62
314
+ - task:
315
+ type: text-generation
316
+ dataset:
317
+ name: LaRoSeDa_binary
318
+ type: LaRoSeDa_binary
319
+ metrics:
320
+ - name: 0-shot
321
+ type: macro-f1
322
+ value: 42.78
323
+ - name: 1-shot
324
+ type: macro-f1
325
+ value: 98.00
326
+ - name: 3-shot
327
+ type: macro-f1
328
+ value: 95.13
329
+ - name: 5-shot
330
+ type: macro-f1
331
+ value: 97.07
332
+ - task:
333
+ type: text-generation
334
+ dataset:
335
+ name: LaRoSeDa_multiclass
336
+ type: LaRoSeDa_multiclass
337
+ metrics:
338
+ - name: 0-shot
339
+ type: macro-f1
340
+ value: 46.41
341
+ - name: 1-shot
342
+ type: macro-f1
343
+ value: 67.36
344
+ - name: 3-shot
345
+ type: macro-f1
346
+ value: 65.16
347
+ - name: 5-shot
348
+ type: macro-f1
349
+ value: 65.23
350
+ - task:
351
+ type: text-generation
352
+ dataset:
353
+ name: WMT_EN-RO
354
+ type: WMT_EN-RO
355
+ metrics:
356
+ - name: 0-shot
357
+ type: bleu
358
+ value: 4.45
359
+ - name: 1-shot
360
+ type: bleu
361
+ value: 8.61
362
+ - name: 3-shot
363
+ type: bleu
364
+ value: 12.25
365
+ - name: 5-shot
366
+ type: bleu
367
+ value: 14.73
368
+ - task:
369
+ type: text-generation
370
+ dataset:
371
+ name: WMT_RO-EN
372
+ type: WMT_RO-EN
373
+ metrics:
374
+ - name: 0-shot
375
+ type: bleu
376
+ value: 1.29
377
+ - name: 1-shot
378
+ type: bleu
379
+ value: 10.78
380
+ - name: 3-shot
381
+ type: bleu
382
+ value: 16.82
383
+ - name: 5-shot
384
+ type: bleu
385
+ value: 23.24
386
+ - task:
387
+ type: text-generation
388
+ dataset:
389
+ name: XQuAD_EM
390
+ type: XQuAD_EM
391
+ metrics:
392
+ - name: 0-shot
393
+ type: exact_match
394
+ value: 5.29
395
+ - name: 1-shot
396
+ type: exact_match
397
+ value: 33.95
398
+ - name: 3-shot
399
+ type: exact_match
400
+ value: 39.24
401
+ - name: 5-shot
402
+ type: exact_match
403
+ value: 42.10
404
+ - task:
405
+ type: text-generation
406
+ dataset:
407
+ name: XQuAD_F1
408
+ type: XQuAD_F1
409
+ metrics:
410
+ - name: 0-shot
411
+ type: f1
412
+ value: 16.17
413
+ - name: 1-shot
414
+ type: f1
415
+ value: 51.84
416
+ - name: 3-shot
417
+ type: f1
418
+ value: 58.82
419
+ - name: 5-shot
420
+ type: f1
421
+ value: 61.29
422
+ - task:
423
+ type: text-generation
424
+ dataset:
425
+ name: STS_Spearman
426
+ type: STS_Spearman
427
+ metrics:
428
+ - name: 1-shot
429
+ type: spearman
430
+ value: -1.74
431
+ - name: 3-shot
432
+ type: spearman
433
+ value: 15.47
434
+ - name: 5-shot
435
+ type: spearman
436
+ value: 9.93
437
+ - task:
438
+ type: text-generation
439
+ dataset:
440
+ name: STS_Pearson
441
+ type: STS_Pearson
442
+ metrics:
443
+ - name: 1-shot
444
+ type: pearson
445
+ value: -1.40
446
+ - name: 3-shot
447
+ type: pearson
448
+ value: 15.00
449
+ - name: 5-shot
450
+ type: pearson
451
+ value: 10.33
452
+
453
  ---
454
 
455
  # Model Card for Model ID