task,metric,value,err,version anli_r1,acc,0.352,0.015110404505648664,0 anli_r2,acc,0.354,0.015129868238451773,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.35665529010238906,0.013998056902620199,0 arc_challenge,acc_norm,0.3677474402730375,0.014090995618168468,0 arc_easy,acc,0.6893939393939394,0.009495260551195608,0 arc_easy,acc_norm,0.6750841750841751,0.00961020360450482,0 boolq,acc,0.6614678899082569,0.008276502626477437,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3770850423844681,,1 copa,acc,0.84,0.0368452949177471,0 hellaswag,acc,0.530372435769767,0.004980566907790448,0 hellaswag,acc_norm,0.7117108145787692,0.00452040633108404,0 piqa,acc,0.7872687704026116,0.00954822312304734,0 piqa,acc_norm,0.7889009793253536,0.00952137737873415,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.938,0.007629823996280308,0 sciq,acc_norm,0.928,0.008178195576218681,0 storycloze_2016,acc,0.7669695350080171,0.009776301898548037,0 winogrande,acc,0.6511444356748224,0.013395059320137327,0