|
task,metric,value,err,version
|
|
anli_r1,acc,0.304,0.014553205687950436,0
|
|
anli_r2,acc,0.319,0.01474640486547348,0
|
|
anli_r3,acc,0.335,0.013630871843821465,0
|
|
arc_challenge,acc,0.3412969283276451,0.013855831287497719,0
|
|
arc_challenge,acc_norm,0.36006825938566556,0.014027516814585186,0
|
|
arc_easy,acc,0.6898148148148148,0.009491721291998517,0
|
|
arc_easy,acc_norm,0.6734006734006734,0.00962304703826765,0
|
|
boolq,acc,0.637308868501529,0.008408838061823179,1
|
|
cb,acc,0.35714285714285715,0.06460957383809221,1
|
|
cb,f1,0.20317460317460315,,1
|
|
copa,acc,0.84,0.03684529491774709,0
|
|
hellaswag,acc,0.5318661621190998,0.004979637330230314,0
|
|
hellaswag,acc_norm,0.7107149970125473,0.004525037849178835,0
|
|
piqa,acc,0.7763873775843307,0.009721489519176299,0
|
|
piqa,acc_norm,0.7850924918389554,0.009583665082653308,0
|
|
rte,acc,0.5776173285198556,0.02973162264649588,0
|
|
sciq,acc,0.928,0.008178195576218681,0
|
|
sciq,acc_norm,0.921,0.008534156773333463,0
|
|
storycloze_2016,acc,0.757883484767504,0.009905870033193874,0
|
|
winogrande,acc,0.6187845303867403,0.01365017216416031,0
|
|
|