|
task,metric,value,err,version
|
|
anli_r1,acc,0.324,0.01480686473373886,0
|
|
anli_r2,acc,0.33,0.014876872027456732,0
|
|
anli_r3,acc,0.3258333333333333,0.013535422043417454,0
|
|
arc_challenge,acc,0.3455631399317406,0.013896938461145687,0
|
|
arc_challenge,acc_norm,0.3643344709897611,0.014063260279882413,0
|
|
arc_easy,acc,0.6957070707070707,0.009441202922359185,0
|
|
arc_easy,acc_norm,0.6712962962962963,0.009638903167022168,0
|
|
boolq,acc,0.6629969418960244,0.008267329046329363,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.27708333333333335,,1
|
|
copa,acc,0.82,0.038612291966536955,0
|
|
hellaswag,acc,0.5340569607647879,0.0049781928934062745,0
|
|
hellaswag,acc_norm,0.716391157140012,0.0044982802444945074,0
|
|
piqa,acc,0.7758433079434167,0.00972989795641006,0
|
|
piqa,acc_norm,0.7861806311207835,0.009565994206915607,0
|
|
rte,acc,0.5703971119133574,0.02979666882912467,0
|
|
sciq,acc,0.946,0.007150883521295435,0
|
|
sciq,acc_norm,0.942,0.007395315455792937,0
|
|
storycloze_2016,acc,0.7594869053981828,0.009883453084862687,0
|
|
winogrande,acc,0.6345698500394633,0.013533965097638788,0
|
|
|