task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653605,0 anli_r2,acc,0.336,0.014944140233795018,0 anli_r3,acc,0.3375,0.013655897185463658,0 arc_challenge,acc,0.3515358361774744,0.01395241369960094,0 arc_challenge,acc_norm,0.3660409556313993,0.014077223108470139,0 arc_easy,acc,0.6999158249158249,0.00940400055851335,0 arc_easy,acc_norm,0.6746632996632996,0.009613427708996187,0 boolq,acc,0.6474006116207951,0.00835641249356212,1 cb,acc,0.125,0.04459412925079224,1 cb,f1,0.10899594232927566,,1 copa,acc,0.85,0.03588702812826373,0 hellaswag,acc,0.5310695080661223,0.004980138679161042,0 hellaswag,acc_norm,0.7102170882294364,0.004527343651130806,0 piqa,acc,0.7698585418933623,0.009820832826839815,0 piqa,acc_norm,0.7780195865070729,0.009696120744662022,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.934,0.00785529793869759,0 sciq,acc_norm,0.93,0.008072494358323508,0 storycloze_2016,acc,0.7589524318546232,0.00989094649057693,0 winogrande,acc,0.6227308602999211,0.013622567928799501,0