task,metric,value,err,version anli_r1,acc,0.349,0.015080663991563098,0 anli_r2,acc,0.345,0.015039986742055242,0 anli_r3,acc,0.3466666666666667,0.013744022550571956,0 arc_challenge,acc,0.3395904436860068,0.01383903976282016,0 arc_challenge,acc_norm,0.3660409556313993,0.014077223108470142,0 arc_easy,acc,0.6839225589225589,0.009540440071928283,0 arc_easy,acc_norm,0.6683501683501684,0.009660733780923948,0 boolq,acc,0.6590214067278287,0.00829097981816109,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.43206548866926225,,1 copa,acc,0.84,0.03684529491774709,0 hellaswag,acc,0.5317665803624776,0.004979700695747948,0 hellaswag,acc_norm,0.7076279625572595,0.004539227260397019,0 piqa,acc,0.7905331882480957,0.009494302979819794,0 piqa,acc_norm,0.7927094668117519,0.009457844699952372,0 rte,acc,0.51985559566787,0.030072723167317177,0 sciq,acc,0.935,0.007799733061832017,0 sciq,acc_norm,0.929,0.008125578442487916,0 storycloze_2016,acc,0.7573490112239444,0.009913300265342056,0 winogrande,acc,0.6432517758484609,0.013463393958028726,0