task,metric,value,err,version anli_r1,acc,0.357,0.015158521721486774,0 anli_r2,acc,0.334,0.014922019523732967,0 anli_r3,acc,0.37416666666666665,0.01397501560175897,0 arc_challenge,acc,0.2568259385665529,0.0127669237941168,0 arc_challenge,acc_norm,0.31399317406143346,0.013562691224726297,0 arc_easy,acc,0.5307239057239057,0.010240395584815237,0 arc_easy,acc_norm,0.5084175084175084,0.010258329515226462,0 boolq,acc,0.517125382262997,0.008739923994130054,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.26788664379209554,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.45339573790081655,0.004968058944472161,0 hellaswag,acc_norm,0.5857398924517029,0.004915870966174404,0 piqa,acc,0.7399347116430903,0.010234893249061303,0 piqa,acc_norm,0.7328618063112078,0.01032344049261243,0 rte,acc,0.4657039711191336,0.030025579819366422,0 sciq,acc,0.799,0.012679107214617328,0 sciq,acc_norm,0.751,0.0136816002787023,0 storycloze_2016,acc,0.6878674505611972,0.010715220346279681,0 winogrande,acc,0.5509076558800315,0.01397945938914086,0