|
task,metric,value,err,version
|
|
anli_r1,acc,0.342,0.01500870618212173,0
|
|
anli_r2,acc,0.323,0.014794927843348642,0
|
|
anli_r3,acc,0.33166666666666667,0.013596836729485163,0
|
|
arc_challenge,acc,0.3583617747440273,0.014012883334859864,0
|
|
arc_challenge,acc_norm,0.3677474402730375,0.01409099561816847,0
|
|
arc_easy,acc,0.6919191919191919,0.009473887075826332,0
|
|
arc_easy,acc_norm,0.6830808080808081,0.009547254611446373,0
|
|
boolq,acc,0.6235474006116208,0.008473882279194586,1
|
|
cb,acc,0.375,0.06527912098338669,1
|
|
cb,f1,0.23907547851209823,,1
|
|
copa,acc,0.86,0.03487350880197772,0
|
|
hellaswag,acc,0.5319657438757219,0.0049795737655758615,0
|
|
hellaswag,acc_norm,0.7148974307906791,0.00450540617660685,0
|
|
piqa,acc,0.7812840043525572,0.009644731932667556,0
|
|
piqa,acc_norm,0.7840043525571273,0.009601236303553544,0
|
|
rte,acc,0.5992779783393501,0.029497229237163143,0
|
|
sciq,acc,0.938,0.007629823996280307,0
|
|
sciq,acc_norm,0.93,0.008072494358323499,0
|
|
storycloze_2016,acc,0.7648316408337787,0.009807347513356905,0
|
|
winogrande,acc,0.6290449881610103,0.013576399902231568,0
|
|
|