|
task,metric,value,err,version
|
|
anli_r1,acc,0.334,0.014922019523732958,0
|
|
anli_r2,acc,0.326,0.014830507204541033,0
|
|
anli_r3,acc,0.315,0.013415009084004871,0
|
|
arc_challenge,acc,0.3438566552901024,0.013880644570156205,0
|
|
arc_challenge,acc_norm,0.3609215017064846,0.01403476138617546,0
|
|
arc_easy,acc,0.6898148148148148,0.009491721291998517,0
|
|
arc_easy,acc_norm,0.6679292929292929,0.009663817543072694,0
|
|
boolq,acc,0.6611620795107034,0.008278325755273739,1
|
|
cb,acc,0.4107142857142857,0.06633634150359541,1
|
|
cb,f1,0.28315412186379935,,1
|
|
copa,acc,0.85,0.03588702812826373,0
|
|
hellaswag,acc,0.5351523600876319,0.004977434505403355,0
|
|
hellaswag,acc_norm,0.711611232822147,0.004520870679457054,0
|
|
piqa,acc,0.779651795429815,0.00967053545685313,0
|
|
piqa,acc_norm,0.7889009793253536,0.009521377378734144,0
|
|
rte,acc,0.5523465703971119,0.02993107036293953,0
|
|
sciq,acc,0.944,0.007274401481697059,0
|
|
sciq,acc_norm,0.931,0.008018934050315157,0
|
|
storycloze_2016,acc,0.7584179583110636,0.009898418790766705,0
|
|
winogrande,acc,0.6361483820047356,0.013521488896883413,0
|
|
|