|
task,metric,value,err,version
|
|
anli_r1,acc,0.328,0.014853842487270334,0
|
|
anli_r2,acc,0.321,0.014770821817934649,0
|
|
anli_r3,acc,0.3491666666666667,0.013767075395077249,0
|
|
arc_challenge,acc,0.3250853242320819,0.013688147309729124,0
|
|
arc_challenge,acc_norm,0.35665529010238906,0.013998056902620199,0
|
|
arc_easy,acc,0.6742424242424242,0.009616642976885964,0
|
|
arc_easy,acc_norm,0.6405723905723906,0.009845958893373752,0
|
|
boolq,acc,0.6489296636085627,0.00834811495726361,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.3013448230839535,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.5251941844254132,0.00498344288867777,0
|
|
hellaswag,acc_norm,0.6989643497311293,0.0045777070250313644,0
|
|
piqa,acc,0.7747551686615887,0.009746643471032145,0
|
|
piqa,acc_norm,0.779651795429815,0.00967053545685313,0
|
|
rte,acc,0.5342960288808665,0.030025579819366426,0
|
|
sciq,acc,0.916,0.008776162089491127,0
|
|
sciq,acc_norm,0.897,0.009616833339695792,0
|
|
storycloze_2016,acc,0.7504008551576697,0.010008002459430844,0
|
|
winogrande,acc,0.6108918705603789,0.013702520871485945,0
|
|
|