task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095524,0 anli_r2,acc,0.337,0.0149550879186536,0 anli_r3,acc,0.3475,0.013751753243291852,0 arc_challenge,acc,0.34982935153583616,0.01393680921215828,0 arc_challenge,acc_norm,0.3626279863481229,0.014049106564955012,0 arc_easy,acc,0.6893939393939394,0.009495260551195607,0 arc_easy,acc_norm,0.6721380471380471,0.00963258707617002,0 boolq,acc,0.6321100917431193,0.008434276591093038,1 cb,acc,0.4107142857142857,0.06633634150359538,1 cb,f1,0.3114633159610671,,1 copa,acc,0.85,0.035887028128263714,0 hellaswag,acc,0.5340569607647879,0.0049781928934062745,0 hellaswag,acc_norm,0.7103166699860586,0.00452688302102762,0 piqa,acc,0.7780195865070729,0.009696120744662026,0 piqa,acc_norm,0.7878128400435256,0.009539299828174055,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.934,0.007855297938697589,0 sciq,acc_norm,0.933,0.007910345983177549,0 storycloze_2016,acc,0.7659005879208979,0.009791868211495318,0 winogrande,acc,0.6337805840568271,0.013540144376588896,0