task,metric,value,err,version anli_r1,acc,0.306,0.01458000605543697,0 anli_r2,acc,0.339,0.014976758771620347,0 anli_r3,acc,0.33166666666666667,0.013596836729485166,0 arc_challenge,acc,0.3250853242320819,0.013688147309729119,0 arc_challenge,acc_norm,0.35580204778157,0.013990571137918758,0 arc_easy,acc,0.6755050505050505,0.009606970654515781,0 arc_easy,acc_norm,0.6561447811447811,0.009746660584852442,0 boolq,acc,0.6608562691131499,0.008280145027624473,1 cb,acc,0.19642857142857142,0.05357142857142858,1 cb,f1,0.20578463681911954,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.5296753634734117,0.004980985384152899,0 hellaswag,acc_norm,0.7057359091814379,0.004547798964126677,0 piqa,acc,0.7758433079434167,0.00972989795641005,0 piqa,acc_norm,0.7883569096844396,0.009530351270479393,0 rte,acc,0.5667870036101083,0.029826764082138274,0 sciq,acc,0.944,0.007274401481697058,0 sciq,acc_norm,0.935,0.007799733061832014,0 storycloze_2016,acc,0.7621592731159808,0.009845667782049759,0 winogrande,acc,0.6408839779005525,0.013483115202120234,0