|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932572,0
|
|
anli_r2,acc,0.344,0.015029633724408948,0
|
|
anli_r3,acc,0.36916666666666664,0.01393666834928527,0
|
|
arc_challenge,acc,0.26535836177474403,0.012902554762313967,0
|
|
arc_challenge,acc_norm,0.3037542662116041,0.01343890918477876,0
|
|
arc_easy,acc,0.5378787878787878,0.010230299628864799,0
|
|
arc_easy,acc_norm,0.5206228956228957,0.010251052755716122,0
|
|
boolq,acc,0.5165137614678899,0.008740284046486645,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.3255198942180551,,1
|
|
copa,acc,0.69,0.04648231987117316,0
|
|
hellaswag,acc,0.45518820952001593,0.004969701081068371,0
|
|
hellaswag,acc_norm,0.5893248356901015,0.004909509538525173,0
|
|
piqa,acc,0.719804134929271,0.01047812201557708,0
|
|
piqa,acc_norm,0.7274211099020674,0.010389256803296018,0
|
|
rte,acc,0.4729241877256318,0.030052303463143706,0
|
|
sciq,acc,0.806,0.012510816141264362,0
|
|
sciq,acc_norm,0.777,0.013169830843425672,0
|
|
storycloze_2016,acc,0.6964190272581507,0.010632901358518371,0
|
|
winogrande,acc,0.5438042620363063,0.01399845361092433,0
|
|
|