|
task,metric,value,err,version
|
|
anli_r1,acc,0.33,0.014876872027456738,0
|
|
anli_r2,acc,0.344,0.015029633724408943,0
|
|
anli_r3,acc,0.3616666666666667,0.013876131663123877,0
|
|
arc_challenge,acc,0.3250853242320819,0.013688147309729119,0
|
|
arc_challenge,acc_norm,0.34215017064846415,0.013864152159177275,0
|
|
arc_easy,acc,0.6654040404040404,0.009682137724327907,0
|
|
arc_easy,acc_norm,0.5997474747474747,0.010053550119896138,0
|
|
boolq,acc,0.6351681957186545,0.008419440984963646,1
|
|
cb,acc,0.25,0.058387420812114225,1
|
|
cb,f1,0.1693693693693694,,1
|
|
copa,acc,0.83,0.03775251680686371,0
|
|
hellaswag,acc,0.5343557060346544,0.004977988452502641,0
|
|
hellaswag,acc_norm,0.7091216889065923,0.004532393111248685,0
|
|
piqa,acc,0.7676822633297062,0.009853201384168241,0
|
|
piqa,acc_norm,0.7823721436343852,0.00962740747484087,0
|
|
rte,acc,0.5667870036101083,0.02982676408213827,0
|
|
sciq,acc,0.894,0.009739551265785138,0
|
|
sciq,acc_norm,0.822,0.012102167676183589,0
|
|
storycloze_2016,acc,0.7573490112239444,0.009913300265342059,0
|
|
winogrande,acc,0.6250986582478295,0.013605544523788008,0
|
|
|