task,metric,value,err,version anli_r1,acc,0.319,0.014746404865473479,0 anli_r2,acc,0.318,0.0147340793093119,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.34215017064846415,0.013864152159177278,0 arc_challenge,acc_norm,0.35494880546075086,0.013983036904094095,0 arc_easy,acc,0.6704545454545454,0.009645184190953855,0 arc_easy,acc_norm,0.6439393939393939,0.009825454608416303,0 boolq,acc,0.6461773700305811,0.008362983020904465,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.31876138433515483,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.533559051981677,0.0049785296421409365,0 hellaswag,acc_norm,0.7054371639115714,0.004549143750428458,0 piqa,acc,0.7736670293797606,0.009763294246879425,0 piqa,acc_norm,0.7823721436343852,0.009627407474840869,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.928,0.008178195576218681,0 sciq,acc_norm,0.915,0.008823426366942317,0 storycloze_2016,acc,0.7455905932656334,0.010071542492663043,0 winogrande,acc,0.6179952644041041,0.013655578215970422,0