|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811478,0
|
|
anli_r2,acc,0.315,0.014696631960792492,0
|
|
anli_r3,acc,0.3233333333333333,0.013508372867300212,0
|
|
arc_challenge,acc,0.35921501706484643,0.014020224155839141,0
|
|
arc_challenge,acc_norm,0.3651877133105802,0.014070265519268804,0
|
|
arc_easy,acc,0.6902356902356902,0.00948817285190372,0
|
|
arc_easy,acc_norm,0.6734006734006734,0.009623047038267657,0
|
|
boolq,acc,0.6688073394495413,0.008231583858517822,1
|
|
cb,acc,0.5,0.06741998624632421,1
|
|
cb,f1,0.36179337231968806,,1
|
|
copa,acc,0.85,0.035887028128263734,0
|
|
hellaswag,acc,0.5313682533359888,0.004979952166595539,0
|
|
hellaswag,acc_norm,0.7123083051185023,0.004517614647703246,0
|
|
piqa,acc,0.7763873775843307,0.009721489519176297,0
|
|
piqa,acc_norm,0.7910772578890098,0.009485227030105093,0
|
|
rte,acc,0.5595667870036101,0.029882123363118712,0
|
|
sciq,acc,0.937,0.007687007876286423,0
|
|
sciq,acc_norm,0.931,0.008018934050315158,0
|
|
storycloze_2016,acc,0.7530732228754676,0.00997199136038898,0
|
|
winogrande,acc,0.6495659037095501,0.013409047676670192,0
|
|
|