|
task,metric,value,err,version
|
|
anli_r1,acc,0.319,0.014746404865473479,0
|
|
anli_r2,acc,0.318,0.0147340793093119,0
|
|
anli_r3,acc,0.3458333333333333,0.013736245342311012,0
|
|
arc_challenge,acc,0.34215017064846415,0.013864152159177278,0
|
|
arc_challenge,acc_norm,0.35494880546075086,0.013983036904094095,0
|
|
arc_easy,acc,0.6704545454545454,0.009645184190953855,0
|
|
arc_easy,acc_norm,0.6439393939393939,0.009825454608416303,0
|
|
boolq,acc,0.6461773700305811,0.008362983020904465,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.31876138433515483,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.533559051981677,0.0049785296421409365,0
|
|
hellaswag,acc_norm,0.7054371639115714,0.004549143750428458,0
|
|
piqa,acc,0.7736670293797606,0.009763294246879425,0
|
|
piqa,acc_norm,0.7823721436343852,0.009627407474840869,0
|
|
rte,acc,0.5523465703971119,0.02993107036293953,0
|
|
sciq,acc,0.928,0.008178195576218681,0
|
|
sciq,acc_norm,0.915,0.008823426366942317,0
|
|
storycloze_2016,acc,0.7455905932656334,0.010071542492663043,0
|
|
winogrande,acc,0.6179952644041041,0.013655578215970422,0
|
|
|