task,metric,value,err,version anli_r1,acc,0.32,0.01475865230357487,0 anli_r2,acc,0.335,0.014933117490932566,0 anli_r3,acc,0.33916666666666667,0.013672343491681812,0 arc_challenge,acc,0.3430034129692833,0.013872423223718173,0 arc_challenge,acc_norm,0.34982935153583616,0.013936809212158284,0 arc_easy,acc,0.672979797979798,0.009626235849372207,0 arc_easy,acc_norm,0.6553030303030303,0.009752321586569784,0 boolq,acc,0.6464831804281346,0.008361346005339394,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.28595317725752506,,1 copa,acc,0.89,0.03144660377352203,0 hellaswag,acc,0.5285799641505676,0.004981623292196192,0 hellaswag,acc_norm,0.7057359091814379,0.00454779896412668,0 piqa,acc,0.7774755168661589,0.009704600975718245,0 piqa,acc_norm,0.7861806311207835,0.009565994206915606,0 rte,acc,0.5451263537906137,0.029973636495415255,0 sciq,acc,0.929,0.008125578442487923,0 sciq,acc_norm,0.923,0.008434580140240644,0 storycloze_2016,acc,0.757883484767504,0.009905870033193868,0 winogrande,acc,0.6527229676400947,0.013380909249751242,0