|
task,metric,value,err,version
|
|
anli_r1,acc,0.316,0.014709193056057107,0
|
|
anli_r2,acc,0.332,0.014899597242811478,0
|
|
anli_r3,acc,0.34,0.013680495725767803,0
|
|
arc_challenge,acc,0.34897610921501704,0.013928933461382497,0
|
|
arc_challenge,acc_norm,0.36860068259385664,0.014097810678042184,0
|
|
arc_easy,acc,0.6792929292929293,0.00957747457110883,0
|
|
arc_easy,acc_norm,0.6670875420875421,0.009669958978395326,0
|
|
boolq,acc,0.6516819571865443,0.008332942286688303,1
|
|
cb,acc,0.30357142857142855,0.06199938655510754,1
|
|
cb,f1,0.2236842105263158,,1
|
|
copa,acc,0.84,0.03684529491774709,0
|
|
hellaswag,acc,0.5338577972515435,0.004978328190775526,0
|
|
hellaswag,acc_norm,0.7099183429595698,0.004528723951878254,0
|
|
piqa,acc,0.7687704026115343,0.00983706318062533,0
|
|
piqa,acc_norm,0.7829162132752993,0.009618708415756785,0
|
|
rte,acc,0.5415162454873647,0.029992535385373314,0
|
|
sciq,acc,0.934,0.007855297938697587,0
|
|
sciq,acc_norm,0.925,0.008333333333333326,0
|
|
storycloze_2016,acc,0.7600213789417424,0.009875938525582594,0
|
|
winogrande,acc,0.6369376479873717,0.013515191866479221,0
|
|
|