|
task,metric,value,err,version
|
|
anli_r1,acc,0.334,0.014922019523732954,0
|
|
anli_r2,acc,0.314,0.014683991951087974,0
|
|
anli_r3,acc,0.3416666666666667,0.013696658778002514,0
|
|
arc_challenge,acc,0.3464163822525597,0.013905011180063253,0
|
|
arc_challenge,acc_norm,0.36006825938566556,0.014027516814585186,0
|
|
arc_easy,acc,0.6877104377104377,0.009509325983631455,0
|
|
arc_easy,acc_norm,0.6683501683501684,0.009660733780923952,0
|
|
boolq,acc,0.6681957186544343,0.008235412870849404,1
|
|
cb,acc,0.4107142857142857,0.06633634150359541,1
|
|
cb,f1,0.2859744990892532,,1
|
|
copa,acc,0.84,0.0368452949177471,0
|
|
hellaswag,acc,0.5349531965743876,0.004977574188421318,0
|
|
hellaswag,acc_norm,0.7157936666002789,0.004501137895230712,0
|
|
piqa,acc,0.7834602829162133,0.009609984714384612,0
|
|
piqa,acc_norm,0.7867247007616975,0.00955712122586134,0
|
|
rte,acc,0.5740072202166066,0.029764956741777652,0
|
|
sciq,acc,0.942,0.007395315455792948,0
|
|
sciq,acc_norm,0.935,0.007799733061832016,0
|
|
storycloze_2016,acc,0.760555852485302,0.009868402764412846,0
|
|
winogrande,acc,0.6385161799526441,0.013502479670791292,0
|
|
|