task,metric,value,err,version anli_r1,acc,0.334,0.01492201952373297,0 anli_r2,acc,0.336,0.01494414023379502,0 anli_r3,acc,0.33916666666666667,0.013672343491681819,0 arc_challenge,acc,0.33361774744027306,0.01377868705417654,0 arc_challenge,acc_norm,0.34897610921501704,0.013928933461382496,0 arc_easy,acc,0.6856060606060606,0.009526702423162905,0 arc_easy,acc_norm,0.6426767676767676,0.009833205612463107,0 boolq,acc,0.6351681957186545,0.00841944098496365,1 cb,acc,0.19642857142857142,0.05357142857142859,1 cb,f1,0.18920723969812325,,1 copa,acc,0.85,0.03588702812826373,0 hellaswag,acc,0.5288787094204341,0.004981451704451047,0 hellaswag,acc_norm,0.7052380003983271,0.0045500389685506236,0 piqa,acc,0.7627856365614799,0.009924694933586364,0 piqa,acc_norm,0.7742110990206746,0.009754980670917315,0 rte,acc,0.4729241877256318,0.030052303463143706,0 sciq,acc,0.922,0.008484573530118581,0 sciq,acc_norm,0.897,0.00961683333969579,0 storycloze_2016,acc,0.7402458578300374,0.010140244588689848,0 winogrande,acc,0.6172059984214681,0.013660946109442013,0