|
task,metric,value,err,version
|
|
anli_r1,acc,0.329,0.014865395385928367,0
|
|
anli_r2,acc,0.312,0.014658474370509005,0
|
|
anli_r3,acc,0.3458333333333333,0.013736245342311012,0
|
|
arc_challenge,acc,0.3361774744027304,0.013804855026205758,0
|
|
arc_challenge,acc_norm,0.35921501706484643,0.014020224155839152,0
|
|
arc_easy,acc,0.6847643097643098,0.009533589368505855,0
|
|
arc_easy,acc_norm,0.6393097643097643,0.00985351210841675,0
|
|
boolq,acc,0.653211009174312,0.008324380793263166,1
|
|
cb,acc,0.5,0.06741998624632421,1
|
|
cb,f1,0.3553459119496855,,1
|
|
copa,acc,0.78,0.04163331998932263,0
|
|
hellaswag,acc,0.533559051981677,0.0049785296421409365,0
|
|
hellaswag,acc_norm,0.7088229436367257,0.0045337646862119935,0
|
|
piqa,acc,0.7752992383025027,0.009738282586548384,0
|
|
piqa,acc_norm,0.7856365614798694,0.00957484213605097,0
|
|
rte,acc,0.5523465703971119,0.02993107036293953,0
|
|
sciq,acc,0.93,0.0080724943583235,0
|
|
sciq,acc_norm,0.925,0.008333333333333345,0
|
|
storycloze_2016,acc,0.7477284874398717,0.010043504206387305,0
|
|
winogrande,acc,0.6479873717442778,0.013422874824929714,0
|
|
|