task,metric,value,err,version anli_r1,acc,0.334,0.014922019523732958,0 anli_r2,acc,0.326,0.014830507204541033,0 anli_r3,acc,0.315,0.013415009084004871,0 arc_challenge,acc,0.3438566552901024,0.013880644570156205,0 arc_challenge,acc_norm,0.3609215017064846,0.01403476138617546,0 arc_easy,acc,0.6898148148148148,0.009491721291998517,0 arc_easy,acc_norm,0.6679292929292929,0.009663817543072694,0 boolq,acc,0.6611620795107034,0.008278325755273739,1 cb,acc,0.4107142857142857,0.06633634150359541,1 cb,f1,0.28315412186379935,,1 copa,acc,0.85,0.03588702812826373,0 hellaswag,acc,0.5351523600876319,0.004977434505403355,0 hellaswag,acc_norm,0.711611232822147,0.004520870679457054,0 piqa,acc,0.779651795429815,0.00967053545685313,0 piqa,acc_norm,0.7889009793253536,0.009521377378734144,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.944,0.007274401481697059,0 sciq,acc_norm,0.931,0.008018934050315157,0 storycloze_2016,acc,0.7584179583110636,0.009898418790766705,0 winogrande,acc,0.6361483820047356,0.013521488896883413,0