task,metric,value,err,version anli_r1,acc,0.31,0.014632638658632902,0 anli_r2,acc,0.31,0.014632638658632905,0 anli_r3,acc,0.3283333333333333,0.013562032919529017,0 arc_challenge,acc,0.2883959044368601,0.013238394422428173,0 arc_challenge,acc_norm,0.3148464163822526,0.01357265770308495,0 arc_easy,acc,0.6262626262626263,0.009927267058259621,0 arc_easy,acc_norm,0.5934343434343434,0.010079056419223527,0 boolq,acc,0.5522935779816514,0.008697094687974059,1 cb,acc,0.3392857142857143,0.06384226561930825,1 cb,f1,0.29749748849204566,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4803823939454292,0.004985939292819582,0 hellaswag,acc_norm,0.6294562836088429,0.004819633668832538,0 piqa,acc,0.7486398258977149,0.010121156016819259,0 piqa,acc_norm,0.7633297062023939,0.009916841655042809,0 rte,acc,0.44765342960288806,0.02993107036293953,0 sciq,acc,0.892,0.0098200016513457,0 sciq,acc_norm,0.869,0.010674874844837954,0 storycloze_2016,acc,0.7049706039551042,0.010546232606962289,0 winogrande,acc,0.5887924230465666,0.013829128358676874,0