task,metric,value,err,version anli_r1,acc,0.322,0.014782913600996674,0 anli_r2,acc,0.332,0.014899597242811483,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.3293515358361775,0.013734057652635473,0 arc_challenge,acc_norm,0.3515358361774744,0.013952413699600943,0 arc_easy,acc,0.6654040404040404,0.009682137724327909,0 arc_easy,acc_norm,0.6595117845117845,0.009723676813825861,0 boolq,acc,0.6461773700305811,0.008362983020904467,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.32857142857142857,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.5291774546903008,0.004981278326428018,0 hellaswag,acc_norm,0.7015534754033061,0.004566412808642458,0 piqa,acc,0.7818280739934712,0.009636081958374381,0 piqa,acc_norm,0.7905331882480957,0.009494302979819806,0 rte,acc,0.6064981949458483,0.029405839314203198,0 sciq,acc,0.932,0.007964887911291605,0 sciq,acc_norm,0.918,0.008680515615523732,0 storycloze_2016,acc,0.7589524318546232,0.00989094649057694,0 winogrande,acc,0.6416732438831886,0.01347658117256753,0