task,metric,value,err,version anli_r1,acc,0.342,0.01500870618212173,0 anli_r2,acc,0.323,0.014794927843348642,0 anli_r3,acc,0.33166666666666667,0.013596836729485163,0 arc_challenge,acc,0.3583617747440273,0.014012883334859864,0 arc_challenge,acc_norm,0.3677474402730375,0.01409099561816847,0 arc_easy,acc,0.6919191919191919,0.009473887075826332,0 arc_easy,acc_norm,0.6830808080808081,0.009547254611446373,0 boolq,acc,0.6235474006116208,0.008473882279194586,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.23907547851209823,,1 copa,acc,0.86,0.03487350880197772,0 hellaswag,acc,0.5319657438757219,0.0049795737655758615,0 hellaswag,acc_norm,0.7148974307906791,0.00450540617660685,0 piqa,acc,0.7812840043525572,0.009644731932667556,0 piqa,acc_norm,0.7840043525571273,0.009601236303553544,0 rte,acc,0.5992779783393501,0.029497229237163143,0 sciq,acc,0.938,0.007629823996280307,0 sciq,acc_norm,0.93,0.008072494358323499,0 storycloze_2016,acc,0.7648316408337787,0.009807347513356905,0 winogrande,acc,0.6290449881610103,0.013576399902231568,0