|
task,metric,value,err,version
|
|
anli_r1,acc,0.314,0.014683991951087962,0
|
|
anli_r2,acc,0.342,0.015008706182121734,0
|
|
anli_r3,acc,0.32416666666666666,0.013517438120881636,0
|
|
arc_challenge,acc,0.29180887372013653,0.013284525292403503,0
|
|
arc_challenge,acc_norm,0.3046075085324232,0.01344952210993249,0
|
|
arc_easy,acc,0.6342592592592593,0.00988298806941883,0
|
|
arc_easy,acc_norm,0.6212121212121212,0.00995373765654204,0
|
|
boolq,acc,0.599388379204893,0.008570545612096372,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.37437732746529967,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4823740290778729,0.004986680048438317,0
|
|
hellaswag,acc_norm,0.6320454092810197,0.004812633280078256,0
|
|
piqa,acc,0.7600652883569097,0.009963625892809544,0
|
|
piqa,acc_norm,0.7633297062023939,0.009916841655042809,0
|
|
rte,acc,0.5306859205776173,0.030039730592197812,0
|
|
sciq,acc,0.917,0.00872852720607479,0
|
|
sciq,acc_norm,0.902,0.009406619184621236,0
|
|
storycloze_2016,acc,0.7215392838054516,0.010365521460604417,0
|
|
winogrande,acc,0.5887924230465666,0.013829128358676878,0
|
|
|