Safetensors
Romanian
llama
Eval Results
mihaimasala commited on
Commit
1837e89
1 Parent(s): c85bb73

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +474 -473
README.md CHANGED
@@ -7,479 +7,480 @@ base_model:
7
  datasets:
8
  - OpenLLM-Ro/ro_dpo_helpsteer
9
  model-index:
10
- - name: OpenLLM-Ro/RoLlama2-7b-Instruct-DPO-2024-10-09
11
- results:
12
- - task:
13
- type: text-generation
14
- dataset:
15
- name: RoMT-Bench
16
- type: RoMT-Bench
17
- metrics:
18
- - name: Score
19
- type: Score
20
- value: 4.61
21
- - task:
22
- type: text-generation
23
- dataset:
24
- name: RoCulturaBench
25
- type: RoCulturaBench
26
- metrics:
27
- - name: Score
28
- type: Score
29
- value: 4.8
30
- - task:
31
- type: text-generation
32
- dataset:
33
- name: Romanian_Academic_Benchmarks
34
- type: Romanian_Academic_Benchmarks
35
- metrics:
36
- - name: Average accuracy
37
- type: accuracy
38
- value: 43.2
39
- - task:
40
- type: text-generation
41
- dataset:
42
- name: OpenLLM-Ro/ro_arc_challenge
43
- type: OpenLLM-Ro/ro_arc_challenge
44
- metrics:
45
- - name: Average accuracy
46
- type: accuracy
47
- value: 44.24
48
- - task:
49
- type: text-generation
50
- dataset:
51
- name: OpenLLM-Ro/ro_mmlu
52
- type: OpenLLM-Ro/ro_mmlu
53
- metrics:
54
- - name: Average accuracy
55
- type: accuracy
56
- value: 38.39
57
- - task:
58
- type: text-generation
59
- dataset:
60
- name: OpenLLM-Ro/ro_winogrande
61
- type: OpenLLM-Ro/ro_winogrande
62
- metrics:
63
- - name: Average accuracy
64
- type: accuracy
65
- value: 62.57
66
- - task:
67
- type: text-generation
68
- dataset:
69
- name: OpenLLM-Ro/ro_hellaswag
70
- type: OpenLLM-Ro/ro_hellaswag
71
- metrics:
72
- - name: Average accuracy
73
- type: accuracy
74
- value: 59.2
75
- - task:
76
- type: text-generation
77
- dataset:
78
- name: OpenLLM-Ro/ro_gsm8k
79
- type: OpenLLM-Ro/ro_gsm8k
80
- metrics:
81
- - name: Average accuracy
82
- type: accuracy
83
- value: 15.72
84
- - task:
85
- type: text-generation
86
- dataset:
87
- name: OpenLLM-Ro/ro_truthfulqa
88
- type: OpenLLM-Ro/ro_truthfulqa
89
- metrics:
90
- - name: Average accuracy
91
- type: accuracy
92
- value: 39.07
93
- - task:
94
- type: text-generation
95
- dataset:
96
- name: LaRoSeDa_binary
97
- type: LaRoSeDa_binary
98
- metrics:
99
- - name: Average macro-f1
100
- type: macro-f1
101
- value: 97.31
102
- - task:
103
- type: text-generation
104
- dataset:
105
- name: LaRoSeDa_multiclass
106
- type: LaRoSeDa_multiclass
107
- metrics:
108
- - name: Average macro-f1
109
- type: macro-f1
110
- value: 60.56
111
- - task:
112
- type: text-generation
113
- dataset:
114
- name: LaRoSeDa_binary_finetuned
115
- type: LaRoSeDa_binary_finetuned
116
- metrics:
117
- - name: Average macro-f1
118
- type: macro-f1
119
- value: 0
120
- - task:
121
- type: text-generation
122
- dataset:
123
- name: LaRoSeDa_multiclass_finetuned
124
- type: LaRoSeDa_multiclass_finetuned
125
- metrics:
126
- - name: Average macro-f1
127
- type: macro-f1
128
- value: 0
129
- - task:
130
- type: text-generation
131
- dataset:
132
- name: WMT_EN-RO
133
- type: WMT_EN-RO
134
- metrics:
135
- - name: Average bleu
136
- type: bleu
137
- value: 26.56
138
- - task:
139
- type: text-generation
140
- dataset:
141
- name: WMT_RO-EN
142
- type: WMT_RO-EN
143
- metrics:
144
- - name: Average bleu
145
- type: bleu
146
- value: 21.68
147
- - task:
148
- type: text-generation
149
- dataset:
150
- name: WMT_EN-RO_finetuned
151
- type: WMT_EN-RO_finetuned
152
- metrics:
153
- - name: Average bleu
154
- type: bleu
155
- value: 0
156
- - task:
157
- type: text-generation
158
- dataset:
159
- name: WMT_RO-EN_finetuned
160
- type: WMT_RO-EN_finetuned
161
- metrics:
162
- - name: Average bleu
163
- type: bleu
164
- value: 0
165
- - task:
166
- type: text-generation
167
- dataset:
168
- name: XQuAD
169
- type: XQuAD
170
- metrics:
171
- - name: Average exact_match
172
- type: exact_match
173
- value: 35.78
174
- - task:
175
- type: text-generation
176
- dataset:
177
- name: XQuAD
178
- type: XQuAD
179
- metrics:
180
- - name: Average f1
181
- type: f1
182
- value: 59.31
183
- - task:
184
- type: text-generation
185
- dataset:
186
- name: XQuAD_finetuned
187
- type: XQuAD_finetuned
188
- metrics:
189
- - name: Average exact_match
190
- type: exact_match
191
- value: 0
192
- - task:
193
- type: text-generation
194
- dataset:
195
- name: XQuAD_finetuned
196
- type: XQuAD_finetuned
197
- metrics:
198
- - name: Average f1
199
- type: f1
200
- value: 0
201
- - task:
202
- type: text-generation
203
- dataset:
204
- name: STS
205
- type: STS
206
- metrics:
207
- - name: Average spearman
208
- type: spearman
209
- value: 61.22
210
- - task:
211
- type: text-generation
212
- dataset:
213
- name: STS
214
- type: STS
215
- metrics:
216
- - name: Average pearson
217
- type: pearson
218
- value: 58.41
219
- - task:
220
- type: text-generation
221
- dataset:
222
- name: STS_finetuned
223
- type: STS_finetuned
224
- metrics:
225
- - name: Average spearman
226
- type: spearman
227
- value: 0
228
- - task:
229
- type: text-generation
230
- dataset:
231
- name: STS_finetuned
232
- type: STS_finetuned
233
- metrics:
234
- - name: Average pearson
235
- type: pearson
236
- value: 0
237
- - task:
238
- type: text-generation
239
- dataset:
240
- name: RoMT-Bench
241
- type: RoMT-Bench
242
- metrics:
243
- - name: First turn
244
- type: Score
245
- value: 5.15
246
- - name: Second turn
247
- type: Score
248
- value: 4.06
249
- - task:
250
- type: text-generation
251
- dataset:
252
- name: OpenLLM-Ro/ro_arc_challenge
253
- type: OpenLLM-Ro/ro_arc_challenge
254
- metrics:
255
- - name: 0-shot
256
- type: accuracy
257
- value: 42.67
258
- - name: 1-shot
259
- type: accuracy
260
- value: 43.36
261
- - name: 3-shot
262
- type: accuracy
263
- value: 44.13
264
- - name: 5-shot
265
- type: accuracy
266
- value: 44.3
267
- - name: 10-shot
268
- type: accuracy
269
- value: 45.67
270
- - name: 25-shot
271
- type: accuracy
272
- value: 45.33
273
- - task:
274
- type: text-generation
275
- dataset:
276
- name: OpenLLM-Ro/ro_mmlu
277
- type: OpenLLM-Ro/ro_mmlu
278
- metrics:
279
- - name: 0-shot
280
- type: accuracy
281
- value: 36.62
282
- - name: 1-shot
283
- type: accuracy
284
- value: 38.04
285
- - name: 3-shot
286
- type: accuracy
287
- value: 39.52
288
- - name: 5-shot
289
- type: accuracy
290
- value: 39.36
291
- - task:
292
- type: text-generation
293
- dataset:
294
- name: OpenLLM-Ro/ro_winogrande
295
- type: OpenLLM-Ro/ro_winogrande
296
- metrics:
297
- - name: 0-shot
298
- type: accuracy
299
- value: 61.72
300
- - name: 1-shot
301
- type: accuracy
302
- value: 62.04
303
- - name: 3-shot
304
- type: accuracy
305
- value: 63.85
306
- - name: 5-shot
307
- type: accuracy
308
- value: 62.67
309
- - task:
310
- type: text-generation
311
- dataset:
312
- name: OpenLLM-Ro/ro_hellaswag
313
- type: OpenLLM-Ro/ro_hellaswag
314
- metrics:
315
- - name: 0-shot
316
- type: accuracy
317
- value: 58.75
318
- - name: 1-shot
319
- type: accuracy
320
- value: 58.29
321
- - name: 3-shot
322
- type: accuracy
323
- value: 59.28
324
- - name: 5-shot
325
- type: accuracy
326
- value: 59.68
327
- - name: 10-shot
328
- type: accuracy
329
- value: 60.01
330
- - task:
331
- type: text-generation
332
- dataset:
333
- name: OpenLLM-Ro/ro_gsm8k
334
- type: OpenLLM-Ro/ro_gsm8k
335
- metrics:
336
- - name: 0-shot
337
- type: accuracy
338
- value: 11.14
339
- - name: 1-shot
340
- type: accuracy
341
- value: 17.97
342
- - name: 3-shot
343
- type: accuracy
344
- value: 18.04
345
- - task:
346
- type: text-generation
347
- dataset:
348
- name: LaRoSeDa_binary
349
- type: LaRoSeDa_binary
350
- metrics:
351
- - name: 0-shot
352
- type: macro-f1
353
- value: 98.03
354
- - name: 1-shot
355
- type: macro-f1
356
- value: 95.96
357
- - name: 3-shot
358
- type: macro-f1
359
- value: 97.33
360
- - name: 5-shot
361
- type: macro-f1
362
- value: 97.9
363
- - task:
364
- type: text-generation
365
- dataset:
366
- name: LaRoSeDa_multiclass
367
- type: LaRoSeDa_multiclass
368
- metrics:
369
- - name: 0-shot
370
- type: macro-f1
371
- value: 60.67
372
- - name: 1-shot
373
- type: macro-f1
374
- value: 51.37
375
- - name: 3-shot
376
- type: macro-f1
377
- value: 62.49
378
- - name: 5-shot
379
- type: macro-f1
380
- value: 67.7
381
- - task:
382
- type: text-generation
383
- dataset:
384
- name: WMT_EN-RO
385
- type: WMT_EN-RO
386
- metrics:
387
- - name: 0-shot
388
- type: bleu
389
- value: 19.83
390
- - name: 1-shot
391
- type: bleu
392
- value: 29.04
393
- - name: 3-shot
394
- type: bleu
395
- value: 28.9
396
- - name: 5-shot
397
- type: bleu
398
- value: 28.47
399
- - task:
400
- type: text-generation
401
- dataset:
402
- name: WMT_RO-EN
403
- type: WMT_RO-EN
404
- metrics:
405
- - name: 0-shot
406
- type: bleu
407
- value: 1.74
408
- - name: 1-shot
409
- type: bleu
410
- value: 15.28
411
- - name: 3-shot
412
- type: bleu
413
- value: 34.13
414
- - name: 5-shot
415
- type: bleu
416
- value: 35.56
417
- - task:
418
- type: text-generation
419
- dataset:
420
- name: XQuAD_EM
421
- type: XQuAD_EM
422
- metrics:
423
- - name: 0-shot
424
- type: exact_match
425
- value: 26.97
426
- - name: 1-shot
427
- type: exact_match
428
- value: 36.3
429
- - name: 3-shot
430
- type: exact_match
431
- value: 40.25
432
- - name: 5-shot
433
- type: exact_match
434
- value: 39.58
435
- - task:
436
- type: text-generation
437
- dataset:
438
- name: XQuAD_F1
439
- type: XQuAD_F1
440
- metrics:
441
- - name: 0-shot
442
- type: f1
443
- value: 52.9
444
- - name: 1-shot
445
- type: f1
446
- value: 60.05
447
- - name: 3-shot
448
- type: f1
449
- value: 62.08
450
- - name: 5-shot
451
- type: f1
452
- value: 62.22
453
- - task:
454
- type: text-generation
455
- dataset:
456
- name: STS
457
- type: STS
458
- metrics:
459
- - name: 0-shot
460
- type: spearman
461
- value: 62.07
462
- - name: 1-shot
463
- type: spearman
464
- value: 59.47
465
- - name: 3-shot
466
- type: spearman
467
- value: 62.12
468
- - task:
469
- type: text-generation
470
- dataset:
471
- name: STS
472
- type: STS
473
- metrics:
474
- - name: 0-shot
475
- type: pearson
476
- value: 60.6
477
- - name: 1-shot
478
- type: pearson
479
- value: 56.44
480
- - name: 3-shot
481
- type: pearson
482
- value: 58.18
 
483
  ---
484
 
485
  # Model Card for Model ID
 
7
  datasets:
8
  - OpenLLM-Ro/ro_dpo_helpsteer
9
  model-index:
10
+ - name: OpenLLM-Ro/RoLlama2-7b-Instruct-DPO-2024-10-09
11
+ results:
12
+ - task:
13
+ type: text-generation
14
+ dataset:
15
+ name: RoMT-Bench
16
+ type: RoMT-Bench
17
+ metrics:
18
+ - name: Score
19
+ type: Score
20
+ value: 4.61
21
+ - task:
22
+ type: text-generation
23
+ dataset:
24
+ name: RoCulturaBench
25
+ type: RoCulturaBench
26
+ metrics:
27
+ - name: Score
28
+ type: Score
29
+ value: 4.80
30
+ - task:
31
+ type: text-generation
32
+ dataset:
33
+ name: Romanian_Academic_Benchmarks
34
+ type: Romanian_Academic_Benchmarks
35
+ metrics:
36
+ - name: Average accuracy
37
+ type: accuracy
38
+ value: 43.20
39
+ - task:
40
+ type: text-generation
41
+ dataset:
42
+ name: OpenLLM-Ro/ro_arc_challenge
43
+ type: OpenLLM-Ro/ro_arc_challenge
44
+ metrics:
45
+ - name: Average accuracy
46
+ type: accuracy
47
+ value: 44.24
48
+ - task:
49
+ type: text-generation
50
+ dataset:
51
+ name: OpenLLM-Ro/ro_mmlu
52
+ type: OpenLLM-Ro/ro_mmlu
53
+ metrics:
54
+ - name: Average accuracy
55
+ type: accuracy
56
+ value: 38.39
57
+ - task:
58
+ type: text-generation
59
+ dataset:
60
+ name: OpenLLM-Ro/ro_winogrande
61
+ type: OpenLLM-Ro/ro_winogrande
62
+ metrics:
63
+ - name: Average accuracy
64
+ type: accuracy
65
+ value: 62.57
66
+ - task:
67
+ type: text-generation
68
+ dataset:
69
+ name: OpenLLM-Ro/ro_hellaswag
70
+ type: OpenLLM-Ro/ro_hellaswag
71
+ metrics:
72
+ - name: Average accuracy
73
+ type: accuracy
74
+ value: 59.20
75
+ - task:
76
+ type: text-generation
77
+ dataset:
78
+ name: OpenLLM-Ro/ro_gsm8k
79
+ type: OpenLLM-Ro/ro_gsm8k
80
+ metrics:
81
+ - name: Average accuracy
82
+ type: accuracy
83
+ value: 15.72
84
+ - task:
85
+ type: text-generation
86
+ dataset:
87
+ name: OpenLLM-Ro/ro_truthfulqa
88
+ type: OpenLLM-Ro/ro_truthfulqa
89
+ metrics:
90
+ - name: Average accuracy
91
+ type: accuracy
92
+ value: 39.07
93
+ - task:
94
+ type: text-generation
95
+ dataset:
96
+ name: LaRoSeDa_binary
97
+ type: LaRoSeDa_binary
98
+ metrics:
99
+ - name: Average macro-f1
100
+ type: macro-f1
101
+ value: 97.31
102
+ - task:
103
+ type: text-generation
104
+ dataset:
105
+ name: LaRoSeDa_multiclass
106
+ type: LaRoSeDa_multiclass
107
+ metrics:
108
+ - name: Average macro-f1
109
+ type: macro-f1
110
+ value: 60.56
111
+ - task:
112
+ type: text-generation
113
+ dataset:
114
+ name: LaRoSeDa_binary_finetuned
115
+ type: LaRoSeDa_binary_finetuned
116
+ metrics:
117
+ - name: Average macro-f1
118
+ type: macro-f1
119
+ value: 0.00
120
+ - task:
121
+ type: text-generation
122
+ dataset:
123
+ name: LaRoSeDa_multiclass_finetuned
124
+ type: LaRoSeDa_multiclass_finetuned
125
+ metrics:
126
+ - name: Average macro-f1
127
+ type: macro-f1
128
+ value: 0.00
129
+ - task:
130
+ type: text-generation
131
+ dataset:
132
+ name: WMT_EN-RO
133
+ type: WMT_EN-RO
134
+ metrics:
135
+ - name: Average bleu
136
+ type: bleu
137
+ value: 26.56
138
+ - task:
139
+ type: text-generation
140
+ dataset:
141
+ name: WMT_RO-EN
142
+ type: WMT_RO-EN
143
+ metrics:
144
+ - name: Average bleu
145
+ type: bleu
146
+ value: 21.68
147
+ - task:
148
+ type: text-generation
149
+ dataset:
150
+ name: WMT_EN-RO_finetuned
151
+ type: WMT_EN-RO_finetuned
152
+ metrics:
153
+ - name: Average bleu
154
+ type: bleu
155
+ value: 0.00
156
+ - task:
157
+ type: text-generation
158
+ dataset:
159
+ name: WMT_RO-EN_finetuned
160
+ type: WMT_RO-EN_finetuned
161
+ metrics:
162
+ - name: Average bleu
163
+ type: bleu
164
+ value: 0.00
165
+ - task:
166
+ type: text-generation
167
+ dataset:
168
+ name: XQuAD
169
+ type: XQuAD
170
+ metrics:
171
+ - name: Average exact_match
172
+ type: exact_match
173
+ value: 35.78
174
+ - task:
175
+ type: text-generation
176
+ dataset:
177
+ name: XQuAD
178
+ type: XQuAD
179
+ metrics:
180
+ - name: Average f1
181
+ type: f1
182
+ value: 59.31
183
+ - task:
184
+ type: text-generation
185
+ dataset:
186
+ name: XQuAD_finetuned
187
+ type: XQuAD_finetuned
188
+ metrics:
189
+ - name: Average exact_match
190
+ type: exact_match
191
+ value: 0.00
192
+ - task:
193
+ type: text-generation
194
+ dataset:
195
+ name: XQuAD_finetuned
196
+ type: XQuAD_finetuned
197
+ metrics:
198
+ - name: Average f1
199
+ type: f1
200
+ value: 0.00
201
+ - task:
202
+ type: text-generation
203
+ dataset:
204
+ name: STS
205
+ type: STS
206
+ metrics:
207
+ - name: Average spearman
208
+ type: spearman
209
+ value: 61.22
210
+ - task:
211
+ type: text-generation
212
+ dataset:
213
+ name: STS
214
+ type: STS
215
+ metrics:
216
+ - name: Average pearson
217
+ type: pearson
218
+ value: 58.41
219
+ - task:
220
+ type: text-generation
221
+ dataset:
222
+ name: STS_finetuned
223
+ type: STS_finetuned
224
+ metrics:
225
+ - name: Average spearman
226
+ type: spearman
227
+ value: 0.00
228
+ - task:
229
+ type: text-generation
230
+ dataset:
231
+ name: STS_finetuned
232
+ type: STS_finetuned
233
+ metrics:
234
+ - name: Average pearson
235
+ type: pearson
236
+ value: 0.00
237
+ - task:
238
+ type: text-generation
239
+ dataset:
240
+ name: RoMT-Bench
241
+ type: RoMT-Bench
242
+ metrics:
243
+ - name: First turn
244
+ type: Score
245
+ value: 5.15
246
+ - name: Second turn
247
+ type: Score
248
+ value: 4.06
249
+ - task:
250
+ type: text-generation
251
+ dataset:
252
+ name: OpenLLM-Ro/ro_arc_challenge
253
+ type: OpenLLM-Ro/ro_arc_challenge
254
+ metrics:
255
+ - name: 0-shot
256
+ type: accuracy
257
+ value: 42.67
258
+ - name: 1-shot
259
+ type: accuracy
260
+ value: 43.36
261
+ - name: 3-shot
262
+ type: accuracy
263
+ value: 44.13
264
+ - name: 5-shot
265
+ type: accuracy
266
+ value: 44.30
267
+ - name: 10-shot
268
+ type: accuracy
269
+ value: 45.67
270
+ - name: 25-shot
271
+ type: accuracy
272
+ value: 45.33
273
+ - task:
274
+ type: text-generation
275
+ dataset:
276
+ name: OpenLLM-Ro/ro_mmlu
277
+ type: OpenLLM-Ro/ro_mmlu
278
+ metrics:
279
+ - name: 0-shot
280
+ type: accuracy
281
+ value: 36.62
282
+ - name: 1-shot
283
+ type: accuracy
284
+ value: 38.04
285
+ - name: 3-shot
286
+ type: accuracy
287
+ value: 39.52
288
+ - name: 5-shot
289
+ type: accuracy
290
+ value: 39.36
291
+ - task:
292
+ type: text-generation
293
+ dataset:
294
+ name: OpenLLM-Ro/ro_winogrande
295
+ type: OpenLLM-Ro/ro_winogrande
296
+ metrics:
297
+ - name: 0-shot
298
+ type: accuracy
299
+ value: 61.72
300
+ - name: 1-shot
301
+ type: accuracy
302
+ value: 62.04
303
+ - name: 3-shot
304
+ type: accuracy
305
+ value: 63.85
306
+ - name: 5-shot
307
+ type: accuracy
308
+ value: 62.67
309
+ - task:
310
+ type: text-generation
311
+ dataset:
312
+ name: OpenLLM-Ro/ro_hellaswag
313
+ type: OpenLLM-Ro/ro_hellaswag
314
+ metrics:
315
+ - name: 0-shot
316
+ type: accuracy
317
+ value: 58.75
318
+ - name: 1-shot
319
+ type: accuracy
320
+ value: 58.29
321
+ - name: 3-shot
322
+ type: accuracy
323
+ value: 59.28
324
+ - name: 5-shot
325
+ type: accuracy
326
+ value: 59.68
327
+ - name: 10-shot
328
+ type: accuracy
329
+ value: 60.01
330
+ - task:
331
+ type: text-generation
332
+ dataset:
333
+ name: OpenLLM-Ro/ro_gsm8k
334
+ type: OpenLLM-Ro/ro_gsm8k
335
+ metrics:
336
+ - name: 0-shot
337
+ type: accuracy
338
+ value: 11.14
339
+ - name: 1-shot
340
+ type: accuracy
341
+ value: 17.97
342
+ - name: 3-shot
343
+ type: accuracy
344
+ value: 18.04
345
+ - task:
346
+ type: text-generation
347
+ dataset:
348
+ name: LaRoSeDa_binary
349
+ type: LaRoSeDa_binary
350
+ metrics:
351
+ - name: 0-shot
352
+ type: macro-f1
353
+ value: 98.03
354
+ - name: 1-shot
355
+ type: macro-f1
356
+ value: 95.96
357
+ - name: 3-shot
358
+ type: macro-f1
359
+ value: 97.33
360
+ - name: 5-shot
361
+ type: macro-f1
362
+ value: 97.90
363
+ - task:
364
+ type: text-generation
365
+ dataset:
366
+ name: LaRoSeDa_multiclass
367
+ type: LaRoSeDa_multiclass
368
+ metrics:
369
+ - name: 0-shot
370
+ type: macro-f1
371
+ value: 60.67
372
+ - name: 1-shot
373
+ type: macro-f1
374
+ value: 51.37
375
+ - name: 3-shot
376
+ type: macro-f1
377
+ value: 62.49
378
+ - name: 5-shot
379
+ type: macro-f1
380
+ value: 67.70
381
+ - task:
382
+ type: text-generation
383
+ dataset:
384
+ name: WMT_EN-RO
385
+ type: WMT_EN-RO
386
+ metrics:
387
+ - name: 0-shot
388
+ type: bleu
389
+ value: 19.83
390
+ - name: 1-shot
391
+ type: bleu
392
+ value: 29.04
393
+ - name: 3-shot
394
+ type: bleu
395
+ value: 28.90
396
+ - name: 5-shot
397
+ type: bleu
398
+ value: 28.47
399
+ - task:
400
+ type: text-generation
401
+ dataset:
402
+ name: WMT_RO-EN
403
+ type: WMT_RO-EN
404
+ metrics:
405
+ - name: 0-shot
406
+ type: bleu
407
+ value: 1.74
408
+ - name: 1-shot
409
+ type: bleu
410
+ value: 15.28
411
+ - name: 3-shot
412
+ type: bleu
413
+ value: 34.13
414
+ - name: 5-shot
415
+ type: bleu
416
+ value: 35.56
417
+ - task:
418
+ type: text-generation
419
+ dataset:
420
+ name: XQuAD_EM
421
+ type: XQuAD_EM
422
+ metrics:
423
+ - name: 0-shot
424
+ type: exact_match
425
+ value: 26.97
426
+ - name: 1-shot
427
+ type: exact_match
428
+ value: 36.30
429
+ - name: 3-shot
430
+ type: exact_match
431
+ value: 40.25
432
+ - name: 5-shot
433
+ type: exact_match
434
+ value: 39.58
435
+ - task:
436
+ type: text-generation
437
+ dataset:
438
+ name: XQuAD_F1
439
+ type: XQuAD_F1
440
+ metrics:
441
+ - name: 0-shot
442
+ type: f1
443
+ value: 52.90
444
+ - name: 1-shot
445
+ type: f1
446
+ value: 60.05
447
+ - name: 3-shot
448
+ type: f1
449
+ value: 62.08
450
+ - name: 5-shot
451
+ type: f1
452
+ value: 62.22
453
+ - task:
454
+ type: text-generation
455
+ dataset:
456
+ name: STS_Spearman
457
+ type: STS_Spearman
458
+ metrics:
459
+ - name: 1-shot
460
+ type: spearman
461
+ value: 62.07
462
+ - name: 3-shot
463
+ type: spearman
464
+ value: 59.47
465
+ - name: 5-shot
466
+ type: spearman
467
+ value: 62.12
468
+ - task:
469
+ type: text-generation
470
+ dataset:
471
+ name: STS_Pearson
472
+ type: STS_Pearson
473
+ metrics:
474
+ - name: 1-shot
475
+ type: pearson
476
+ value: 60.60
477
+ - name: 3-shot
478
+ type: pearson
479
+ value: 56.44
480
+ - name: 5-shot
481
+ type: pearson
482
+ value: 58.18
483
+
484
  ---
485
 
486
  # Model Card for Model ID