task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932572,0 anli_r2,acc,0.352,0.015110404505648663,0 anli_r3,acc,0.3233333333333333,0.013508372867300215,0 arc_challenge,acc,0.295221843003413,0.013329750293382316,0 arc_challenge,acc_norm,0.3046075085324232,0.013449522109932487,0 arc_easy,acc,0.6447811447811448,0.009820245899287119,0 arc_easy,acc_norm,0.6195286195286195,0.009962305992058567,0 boolq,acc,0.6143730886850153,0.008513189460768057,1 cb,acc,0.4107142857142857,0.06633634150359541,1 cb,f1,0.3098047785547785,,1 copa,acc,0.78,0.04163331998932262,0 hellaswag,acc,0.47849034056960765,0.00498516207433611,0 hellaswag,acc_norm,0.6403106950806612,0.00478928472395585,0 piqa,acc,0.7562568008705114,0.010017199471500619,0 piqa,acc_norm,0.7622415669205659,0.009932525779525492,0 rte,acc,0.4729241877256318,0.030052303463143706,0 sciq,acc,0.918,0.008680515615523705,0 sciq,acc_norm,0.902,0.009406619184621224,0 storycloze_2016,acc,0.7279529663281668,0.01029088806087124,0 winogrande,acc,0.595895816890292,0.01379161066467086,0