task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932575,0 anli_r2,acc,0.328,0.014853842487270334,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.3378839590443686,0.013822047922283516,0 arc_challenge,acc_norm,0.3643344709897611,0.014063260279882412,0 arc_easy,acc,0.6957070707070707,0.009441202922359183,0 arc_easy,acc_norm,0.6717171717171717,0.00963574950926216,0 boolq,acc,0.6440366972477064,0.008374337517726581,1 cb,acc,0.14285714285714285,0.047184161362558305,1 cb,f1,0.13156966490299823,,1 copa,acc,0.84,0.03684529491774709,0 hellaswag,acc,0.5320653256323441,0.00497951000177662,0 hellaswag,acc_norm,0.7050388368850826,0.004550933142528758,0 piqa,acc,0.7736670293797606,0.009763294246879427,0 piqa,acc_norm,0.7845484221980413,0.009592463115658107,0 rte,acc,0.49097472924187724,0.030091559826331334,0 sciq,acc,0.931,0.008018934050315155,0 sciq,acc_norm,0.922,0.008484573530118587,0 storycloze_2016,acc,0.7536076964190273,0.009964727533753546,0 winogrande,acc,0.6148382004735596,0.013676821287521413,0