task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270334,0 anli_r2,acc,0.321,0.01477082181793464,0 anli_r3,acc,0.33416666666666667,0.013622434813136783,0 arc_challenge,acc,0.3455631399317406,0.01389693846114569,0 arc_challenge,acc_norm,0.3583617747440273,0.014012883334859859,0 arc_easy,acc,0.6914983164983165,0.009477472342978122,0 arc_easy,acc_norm,0.6734006734006734,0.009623047038267656,0 boolq,acc,0.6584097859327217,0.008294560677768487,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.30424242424242426,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.528779127663812,0.004981509099276353,0 hellaswag,acc_norm,0.7054371639115714,0.004549143750428458,0 piqa,acc,0.7763873775843307,0.009721489519176289,0 piqa,acc_norm,0.7872687704026116,0.009548223123047346,0 rte,acc,0.5992779783393501,0.029497229237163147,0 sciq,acc,0.931,0.008018934050315157,0 sciq,acc_norm,0.923,0.008434580140240634,0 storycloze_2016,acc,0.7594869053981828,0.009883453084862687,0 winogrande,acc,0.6464088397790055,0.013436541262599955,0