lm1-4b2-84b-c4-repetitions
/
4b284b12bc4
/evaluation
/4b284b12bc4_1_lm-eval_global_step80108_2023-01-30-11-26-32_1shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.333, | |
"acc_stderr": 0.014910846164229868 | |
}, | |
"anli_r2": { | |
"acc": 0.326, | |
"acc_stderr": 0.01483050720454104 | |
}, | |
"anli_r3": { | |
"acc": 0.3475, | |
"acc_stderr": 0.013751753243291852 | |
}, | |
"cb": { | |
"acc": 0.5357142857142857, | |
"acc_stderr": 0.06724777654937658, | |
"f1": 0.37227304714989445 | |
}, | |
"copa": { | |
"acc": 0.79, | |
"acc_stderr": 0.040936018074033256 | |
}, | |
"hellaswag": { | |
"acc": 0.47191794463254333, | |
"acc_stderr": 0.004981905293878145, | |
"acc_norm": 0.6139215295757817, | |
"acc_norm_stderr": 0.004858539527872466 | |
}, | |
"rte": { | |
"acc": 0.5703971119133574, | |
"acc_stderr": 0.029796668829124674 | |
}, | |
"winogrande": { | |
"acc": 0.5706393054459353, | |
"acc_stderr": 0.013911537499969163 | |
}, | |
"storycloze_2016": { | |
"acc": 0.7151256012827365, | |
"acc_stderr": 0.01043751398661172 | |
}, | |
"boolq": { | |
"acc": 0.5669724770642202, | |
"acc_stderr": 0.00866625130551806 | |
}, | |
"arc_easy": { | |
"acc": 0.5913299663299664, | |
"acc_stderr": 0.010087174498762883, | |
"acc_norm": 0.5496632996632996, | |
"acc_norm_stderr": 0.010209047724374145 | |
}, | |
"arc_challenge": { | |
"acc": 0.2627986348122867, | |
"acc_stderr": 0.012862523175351333, | |
"acc_norm": 0.30716723549488056, | |
"acc_norm_stderr": 0.013481034054980943 | |
}, | |
"sciq": { | |
"acc": 0.836, | |
"acc_stderr": 0.011715000693181331, | |
"acc_norm": 0.781, | |
"acc_norm_stderr": 0.013084731950262012 | |
}, | |
"piqa": { | |
"acc": 0.7448313384113167, | |
"acc_stderr": 0.010171571592521822, | |
"acc_norm": 0.7535364526659413, | |
"acc_norm_stderr": 0.01005481078967181 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |