task,metric,value,err,version anli_r1,acc,0.317,0.014721675438880227,0 anli_r2,acc,0.34,0.014987482264363935,0 anli_r3,acc,0.34,0.01368049572576779,0 arc_challenge,acc,0.3455631399317406,0.013896938461145683,0 arc_challenge,acc_norm,0.37372013651877134,0.014137708601759095,0 arc_easy,acc,0.6864478114478114,0.009519779157242258,0 arc_easy,acc_norm,0.6696127946127947,0.009651430216428182,0 boolq,acc,0.6571865443425077,0.008301676410578645,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.3421735552883094,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.5375423222465644,0.004975696076240845,0 hellaswag,acc_norm,0.7132045409281019,0.004513409114983847,0 piqa,acc,0.7742110990206746,0.00975498067091731,0 piqa,acc_norm,0.7840043525571273,0.009601236303553543,0 rte,acc,0.5342960288808665,0.03002557981936643,0 sciq,acc,0.933,0.007910345983177549,0 sciq,acc_norm,0.93,0.0080724943583235,0 storycloze_2016,acc,0.7669695350080171,0.00977630189854803,0 winogrande,acc,0.6448303078137332,0.013450047479569256,0