|
task,metric,value,err,version
|
|
anli_r1,acc,0.339,0.014976758771620342,0
|
|
anli_r2,acc,0.336,0.014944140233795027,0
|
|
anli_r3,acc,0.3458333333333333,0.013736245342311012,0
|
|
arc_challenge,acc,0.31143344709897613,0.013532472099850945,0
|
|
arc_challenge,acc_norm,0.3412969283276451,0.013855831287497719,0
|
|
arc_easy,acc,0.6734006734006734,0.00962304703826764,0
|
|
arc_easy,acc_norm,0.5892255892255892,0.010095101349348653,0
|
|
boolq,acc,0.6351681957186545,0.00841944098496366,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.19047619047619047,,1
|
|
copa,acc,0.82,0.038612291966536955,0
|
|
hellaswag,acc,0.5356502688707429,0.004977081808179436,0
|
|
hellaswag,acc_norm,0.7113124875522804,0.004522262128177007,0
|
|
piqa,acc,0.780739934711643,0.00965335746360531,0
|
|
piqa,acc_norm,0.7894450489662677,0.009512378081238743,0
|
|
rte,acc,0.628158844765343,0.029091018492217447,0
|
|
sciq,acc,0.896,0.009658016218524298,0
|
|
sciq,acc_norm,0.823,0.012075463420375061,0
|
|
storycloze_2016,acc,0.7589524318546232,0.009890946490576938,0
|
|
winogrande,acc,0.648776637726914,0.013415981370545126,0
|
|
|