|
task,metric,value,err,version
|
|
anli_r1,acc,0.325,0.014818724459095524,0
|
|
anli_r2,acc,0.337,0.0149550879186536,0
|
|
anli_r3,acc,0.3475,0.013751753243291852,0
|
|
arc_challenge,acc,0.34982935153583616,0.01393680921215828,0
|
|
arc_challenge,acc_norm,0.3626279863481229,0.014049106564955012,0
|
|
arc_easy,acc,0.6893939393939394,0.009495260551195607,0
|
|
arc_easy,acc_norm,0.6721380471380471,0.00963258707617002,0
|
|
boolq,acc,0.6321100917431193,0.008434276591093038,1
|
|
cb,acc,0.4107142857142857,0.06633634150359538,1
|
|
cb,f1,0.3114633159610671,,1
|
|
copa,acc,0.85,0.035887028128263714,0
|
|
hellaswag,acc,0.5340569607647879,0.0049781928934062745,0
|
|
hellaswag,acc_norm,0.7103166699860586,0.00452688302102762,0
|
|
piqa,acc,0.7780195865070729,0.009696120744662026,0
|
|
piqa,acc_norm,0.7878128400435256,0.009539299828174055,0
|
|
rte,acc,0.5415162454873647,0.029992535385373314,0
|
|
sciq,acc,0.934,0.007855297938697589,0
|
|
sciq,acc_norm,0.933,0.007910345983177549,0
|
|
storycloze_2016,acc,0.7659005879208979,0.009791868211495318,0
|
|
winogrande,acc,0.6337805840568271,0.013540144376588896,0
|
|
|