{ "results": { "anli_r1": { "acc": 0.316, "acc_stderr": 0.014709193056057107 }, "anli_r2": { "acc": 0.332, "acc_stderr": 0.014899597242811478 }, "anli_r3": { "acc": 0.34, "acc_stderr": 0.013680495725767803 }, "cb": { "acc": 0.30357142857142855, "acc_stderr": 0.06199938655510754, "f1": 0.2236842105263158 }, "copa": { "acc": 0.84, "acc_stderr": 0.03684529491774709 }, "hellaswag": { "acc": 0.5338577972515435, "acc_stderr": 0.004978328190775526, "acc_norm": 0.7099183429595698, "acc_norm_stderr": 0.004528723951878254 }, "rte": { "acc": 0.5415162454873647, "acc_stderr": 0.029992535385373314 }, "winogrande": { "acc": 0.6369376479873717, "acc_stderr": 0.013515191866479221 }, "storycloze_2016": { "acc": 0.7600213789417424, "acc_stderr": 0.009875938525582594 }, "boolq": { "acc": 0.6516819571865443, "acc_stderr": 0.008332942286688303 }, "arc_easy": { "acc": 0.6792929292929293, "acc_stderr": 0.00957747457110883, "acc_norm": 0.6670875420875421, "acc_norm_stderr": 0.009669958978395326 }, "arc_challenge": { "acc": 0.34897610921501704, "acc_stderr": 0.013928933461382497, "acc_norm": 0.36860068259385664, "acc_norm_stderr": 0.014097810678042184 }, "sciq": { "acc": 0.934, "acc_stderr": 0.007855297938697587, "acc_norm": 0.925, "acc_norm_stderr": 0.008333333333333326 }, "piqa": { "acc": 0.7687704026115343, "acc_stderr": 0.00983706318062533, "acc_norm": 0.7829162132752993, "acc_norm_stderr": 0.009618708415756785 } }, "versions": { "anli_r1": 0, "anli_r2": 0, "anli_r3": 0, "cb": 1, "copa": 0, "hellaswag": 0, "rte": 0, "winogrande": 0, "storycloze_2016": 0, "boolq": 1, "arc_easy": 0, "arc_challenge": 0, "sciq": 0, "piqa": 0 } }