task,metric,value,err,version anli_r1,acc,0.336,0.01494414023379502,0 anli_r2,acc,0.315,0.014696631960792506,0 anli_r3,acc,0.34,0.0136804957257678,0 arc_challenge,acc,0.29266211604095566,0.01329591610361942,0 arc_challenge,acc_norm,0.32849829351535836,0.013724978465537357,0 arc_easy,acc,0.6220538720538721,0.009949405744045452,0 arc_easy,acc_norm,0.5787037037037037,0.010131882498193127,0 boolq,acc,0.5669724770642202,0.008666251305518059,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.38181818181818183,,1 copa,acc,0.74,0.04408440022768077,0 hellaswag,acc,0.48137821151165106,0.004986319587524962,0 hellaswag,acc_norm,0.6344353714399522,0.004806039039008954,0 piqa,acc,0.7551686615886833,0.010032309105568788,0 piqa,acc_norm,0.764961915125136,0.009893146688805308,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.891,0.00985982840703719,0 sciq,acc_norm,0.871,0.010605256784796579,0 storycloze_2016,acc,0.7044361304115446,0.01055177883937378,0 winogrande,acc,0.5974743488555643,0.013782866831703048,0