task,metric,value,err,version anli_r1,acc,0.33,0.014876872027456734,0 anli_r2,acc,0.314,0.014683991951087967,0 anli_r3,acc,0.3675,0.013923529685359282,0 arc_challenge,acc,0.3319112627986348,0.013760988200880536,0 arc_challenge,acc_norm,0.3515358361774744,0.013952413699600943,0 arc_easy,acc,0.6767676767676768,0.009597218642045324,0 arc_easy,acc_norm,0.6439393939393939,0.009825454608416304,0 boolq,acc,0.6406727828746177,0.00839181177040674,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.36000000000000004,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.529874526986656,0.004980866814462756,0 hellaswag,acc_norm,0.7042421828321052,0.004554499409290722,0 piqa,acc,0.7823721436343852,0.009627407474840878,0 piqa,acc_norm,0.7861806311207835,0.009565994206915606,0 rte,acc,0.5379061371841155,0.030009848912529113,0 sciq,acc,0.919,0.008632121032139985,0 sciq,acc_norm,0.915,0.00882342636694232,0 storycloze_2016,acc,0.7514698022447889,0.009993659448666372,0 winogrande,acc,0.6385161799526441,0.013502479670791285,0