|
task,metric,value,err,version
|
|
anli_r1,acc,0.333,0.014910846164229863,0
|
|
anli_r2,acc,0.35,0.015090650341444231,0
|
|
anli_r3,acc,0.3491666666666667,0.013767075395077249,0
|
|
arc_challenge,acc,0.2636518771331058,0.012875929151297056,0
|
|
arc_challenge,acc_norm,0.30631399317406144,0.013470584417276513,0
|
|
arc_easy,acc,0.5361952861952862,0.010232865550346727,0
|
|
arc_easy,acc_norm,0.4823232323232323,0.010253369805698971,0
|
|
boolq,acc,0.5504587155963303,0.008700409761350798,1
|
|
cb,acc,0.48214285714285715,0.0673769750864465,1
|
|
cb,f1,0.3432539682539683,,1
|
|
copa,acc,0.75,0.04351941398892446,0
|
|
hellaswag,acc,0.45757817167894843,0.004971789638563322,0
|
|
hellaswag,acc_norm,0.5881298546106354,0.004911659884506154,0
|
|
piqa,acc,0.7334058759521219,0.010316749863541369,0
|
|
piqa,acc_norm,0.733949945593036,0.010310039263352827,0
|
|
rte,acc,0.48736462093862815,0.030086851767188564,0
|
|
sciq,acc,0.765,0.013414729030247116,0
|
|
sciq,acc_norm,0.68,0.014758652303574885,0
|
|
storycloze_2016,acc,0.6900053447354356,0.010695042806212553,0
|
|
winogrande,acc,0.5580110497237569,0.01395758407910899,0
|
|
|