|
task,metric,value,err,version
|
|
anli_r1,acc,0.337,0.014955087918653605,0
|
|
anli_r2,acc,0.336,0.014944140233795018,0
|
|
anli_r3,acc,0.3375,0.013655897185463658,0
|
|
arc_challenge,acc,0.3515358361774744,0.01395241369960094,0
|
|
arc_challenge,acc_norm,0.3660409556313993,0.014077223108470139,0
|
|
arc_easy,acc,0.6999158249158249,0.00940400055851335,0
|
|
arc_easy,acc_norm,0.6746632996632996,0.009613427708996187,0
|
|
boolq,acc,0.6474006116207951,0.00835641249356212,1
|
|
cb,acc,0.125,0.04459412925079224,1
|
|
cb,f1,0.10899594232927566,,1
|
|
copa,acc,0.85,0.03588702812826373,0
|
|
hellaswag,acc,0.5310695080661223,0.004980138679161042,0
|
|
hellaswag,acc_norm,0.7102170882294364,0.004527343651130806,0
|
|
piqa,acc,0.7698585418933623,0.009820832826839815,0
|
|
piqa,acc_norm,0.7780195865070729,0.009696120744662022,0
|
|
rte,acc,0.48375451263537905,0.030080573208738064,0
|
|
sciq,acc,0.934,0.00785529793869759,0
|
|
sciq,acc_norm,0.93,0.008072494358323508,0
|
|
storycloze_2016,acc,0.7589524318546232,0.00989094649057693,0
|
|
winogrande,acc,0.6227308602999211,0.013622567928799501,0
|
|
|