Muennighoff's picture
Add eval
b851397
raw
history blame
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.335,
"acc_stderr": 0.014933117490932572
},
"anli_r2": {
"acc": 0.352,
"acc_stderr": 0.015110404505648663
},
"anli_r3": {
"acc": 0.3233333333333333,
"acc_stderr": 0.013508372867300215
},
"cb": {
"acc": 0.4107142857142857,
"acc_stderr": 0.06633634150359541,
"f1": 0.3098047785547785
},
"copa": {
"acc": 0.78,
"acc_stderr": 0.04163331998932262
},
"hellaswag": {
"acc": 0.47849034056960765,
"acc_stderr": 0.00498516207433611,
"acc_norm": 0.6403106950806612,
"acc_norm_stderr": 0.00478928472395585
},
"rte": {
"acc": 0.4729241877256318,
"acc_stderr": 0.030052303463143706
},
"winogrande": {
"acc": 0.595895816890292,
"acc_stderr": 0.01379161066467086
},
"storycloze_2016": {
"acc": 0.7279529663281668,
"acc_stderr": 0.01029088806087124
},
"boolq": {
"acc": 0.6143730886850153,
"acc_stderr": 0.008513189460768057
},
"arc_easy": {
"acc": 0.6447811447811448,
"acc_stderr": 0.009820245899287119,
"acc_norm": 0.6195286195286195,
"acc_norm_stderr": 0.009962305992058567
},
"arc_challenge": {
"acc": 0.295221843003413,
"acc_stderr": 0.013329750293382316,
"acc_norm": 0.3046075085324232,
"acc_norm_stderr": 0.013449522109932487
},
"sciq": {
"acc": 0.918,
"acc_stderr": 0.008680515615523705,
"acc_norm": 0.902,
"acc_norm_stderr": 0.009406619184621224
},
"piqa": {
"acc": 0.7562568008705114,
"acc_stderr": 0.010017199471500619,
"acc_norm": 0.7622415669205659,
"acc_norm_stderr": 0.009932525779525492
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}