task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224489,0 anli_r2,acc,0.352,0.015110404505648673,0 anli_r3,acc,0.34,0.013680495725767804,0 arc_challenge,acc,0.3515358361774744,0.013952413699600942,0 arc_challenge,acc_norm,0.36006825938566556,0.014027516814585183,0 arc_easy,acc,0.6898148148148148,0.009491721291998515,0 arc_easy,acc_norm,0.6851851851851852,0.009530150430975602,0 boolq,acc,0.6639143730886851,0.008261778456573672,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.28048780487804875,,1 copa,acc,0.82,0.03861229196653694,0 hellaswag,acc,0.5388368850826528,0.004974706428434288,0 hellaswag,acc_norm,0.7166899024098785,0.004496847773250643,0 piqa,acc,0.7720348204570185,0.009788093832324908,0 piqa,acc_norm,0.7899891186071817,0.009503353305818581,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.937,0.007687007876286423,0 sciq,acc_norm,0.939,0.007572076091557425,0 storycloze_2016,acc,0.774986638161411,0.009656738215290533,0 winogrande,acc,0.6416732438831886,0.013476581172567528,0