task,metric,value,err,version anli_r1,acc,0.334,0.01492201952373296,0 anli_r2,acc,0.341,0.014998131348402697,0 anli_r3,acc,0.3625,0.013883037874225516,0 arc_challenge,acc,0.31399317406143346,0.013562691224726291,0 arc_challenge,acc_norm,0.32849829351535836,0.013724978465537364,0 arc_easy,acc,0.6616161616161617,0.009709034670525096,0 arc_easy,acc_norm,0.5875420875420876,0.01010130544786476,0 boolq,acc,0.6333333333333333,0.008428386213506826,1 cb,acc,0.3392857142857143,0.06384226561930824,1 cb,f1,0.2059178743961352,,1 copa,acc,0.87,0.03379976689896309,0 hellaswag,acc,0.5325632344154551,0.004979188195338179,0 hellaswag,acc_norm,0.7046405098585939,0.004552718360513099,0 piqa,acc,0.7704026115342764,0.009812682950815187,0 piqa,acc_norm,0.7850924918389554,0.009583665082653316,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.877,0.010391293421849877,0 sciq,acc_norm,0.795,0.01277255409611312,0 storycloze_2016,acc,0.7482629609834314,0.01003644434459808,0 winogrande,acc,0.6187845303867403,0.013650172164160305,0