|
task,metric,value,err,version
|
|
anli_r1,acc,0.331,0.014888272588203934,0
|
|
anli_r2,acc,0.352,0.015110404505648668,0
|
|
anli_r3,acc,0.35583333333333333,0.01382651874849331,0
|
|
arc_challenge,acc,0.32081911262798635,0.013640943091946526,0
|
|
arc_challenge,acc_norm,0.33447098976109213,0.013787460322441374,0
|
|
arc_easy,acc,0.6637205387205387,0.009694178072725206,0
|
|
arc_easy,acc_norm,0.5896464646464646,0.010093531255765452,0
|
|
boolq,acc,0.6284403669724771,0.008451598145076575,1
|
|
cb,acc,0.2857142857142857,0.06091449038731724,1
|
|
cb,f1,0.1717171717171717,,1
|
|
copa,acc,0.8,0.040201512610368445,0
|
|
hellaswag,acc,0.5319657438757219,0.004979573765575866,0
|
|
hellaswag,acc_norm,0.7045409281019717,0.004553164013379556,0
|
|
piqa,acc,0.7731229597388466,0.009771584259215172,0
|
|
piqa,acc_norm,0.7829162132752993,0.009618708415756788,0
|
|
rte,acc,0.5667870036101083,0.029826764082138277,0
|
|
sciq,acc,0.89,0.00989939381972444,0
|
|
sciq,acc_norm,0.815,0.012285191326386684,0
|
|
storycloze_2016,acc,0.7525387493319081,0.009979234591920141,0
|
|
winogrande,acc,0.6243093922651933,0.013611257508380437,0
|
|
|