task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932572,0 anli_r2,acc,0.344,0.015029633724408948,0 anli_r3,acc,0.36916666666666664,0.01393666834928527,0 arc_challenge,acc,0.26535836177474403,0.012902554762313967,0 arc_challenge,acc_norm,0.3037542662116041,0.01343890918477876,0 arc_easy,acc,0.5378787878787878,0.010230299628864799,0 arc_easy,acc_norm,0.5206228956228957,0.010251052755716122,0 boolq,acc,0.5165137614678899,0.008740284046486645,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.3255198942180551,,1 copa,acc,0.69,0.04648231987117316,0 hellaswag,acc,0.45518820952001593,0.004969701081068371,0 hellaswag,acc_norm,0.5893248356901015,0.004909509538525173,0 piqa,acc,0.719804134929271,0.01047812201557708,0 piqa,acc_norm,0.7274211099020674,0.010389256803296018,0 rte,acc,0.4729241877256318,0.030052303463143706,0 sciq,acc,0.806,0.012510816141264362,0 sciq,acc_norm,0.777,0.013169830843425672,0 storycloze_2016,acc,0.6964190272581507,0.010632901358518371,0 winogrande,acc,0.5438042620363063,0.01399845361092433,0