|
task,metric,value,err,version
|
|
anli_r1,acc,0.334,0.01492201952373296,0
|
|
anli_r2,acc,0.341,0.014998131348402697,0
|
|
anli_r3,acc,0.3625,0.013883037874225516,0
|
|
arc_challenge,acc,0.31399317406143346,0.013562691224726291,0
|
|
arc_challenge,acc_norm,0.32849829351535836,0.013724978465537364,0
|
|
arc_easy,acc,0.6616161616161617,0.009709034670525096,0
|
|
arc_easy,acc_norm,0.5875420875420876,0.01010130544786476,0
|
|
boolq,acc,0.6333333333333333,0.008428386213506826,1
|
|
cb,acc,0.3392857142857143,0.06384226561930824,1
|
|
cb,f1,0.2059178743961352,,1
|
|
copa,acc,0.87,0.03379976689896309,0
|
|
hellaswag,acc,0.5325632344154551,0.004979188195338179,0
|
|
hellaswag,acc_norm,0.7046405098585939,0.004552718360513099,0
|
|
piqa,acc,0.7704026115342764,0.009812682950815187,0
|
|
piqa,acc_norm,0.7850924918389554,0.009583665082653316,0
|
|
rte,acc,0.48014440433212996,0.0300727231673172,0
|
|
sciq,acc,0.877,0.010391293421849877,0
|
|
sciq,acc_norm,0.795,0.01277255409611312,0
|
|
storycloze_2016,acc,0.7482629609834314,0.01003644434459808,0
|
|
winogrande,acc,0.6187845303867403,0.013650172164160305,0
|
|
|