task,metric,value,err,version anli_r1,acc,0.33,0.014876872027456738,0 anli_r2,acc,0.344,0.015029633724408943,0 anli_r3,acc,0.3616666666666667,0.013876131663123877,0 arc_challenge,acc,0.3250853242320819,0.013688147309729119,0 arc_challenge,acc_norm,0.34215017064846415,0.013864152159177275,0 arc_easy,acc,0.6654040404040404,0.009682137724327907,0 arc_easy,acc_norm,0.5997474747474747,0.010053550119896138,0 boolq,acc,0.6351681957186545,0.008419440984963646,1 cb,acc,0.25,0.058387420812114225,1 cb,f1,0.1693693693693694,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.5343557060346544,0.004977988452502641,0 hellaswag,acc_norm,0.7091216889065923,0.004532393111248685,0 piqa,acc,0.7676822633297062,0.009853201384168241,0 piqa,acc_norm,0.7823721436343852,0.00962740747484087,0 rte,acc,0.5667870036101083,0.02982676408213827,0 sciq,acc,0.894,0.009739551265785138,0 sciq,acc_norm,0.822,0.012102167676183589,0 storycloze_2016,acc,0.7573490112239444,0.009913300265342059,0 winogrande,acc,0.6250986582478295,0.013605544523788008,0