task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811478,0 anli_r2,acc,0.315,0.014696631960792492,0 anli_r3,acc,0.3233333333333333,0.013508372867300212,0 arc_challenge,acc,0.35921501706484643,0.014020224155839141,0 arc_challenge,acc_norm,0.3651877133105802,0.014070265519268804,0 arc_easy,acc,0.6902356902356902,0.00948817285190372,0 arc_easy,acc_norm,0.6734006734006734,0.009623047038267657,0 boolq,acc,0.6688073394495413,0.008231583858517822,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.36179337231968806,,1 copa,acc,0.85,0.035887028128263734,0 hellaswag,acc,0.5313682533359888,0.004979952166595539,0 hellaswag,acc_norm,0.7123083051185023,0.004517614647703246,0 piqa,acc,0.7763873775843307,0.009721489519176297,0 piqa,acc_norm,0.7910772578890098,0.009485227030105093,0 rte,acc,0.5595667870036101,0.029882123363118712,0 sciq,acc,0.937,0.007687007876286423,0 sciq,acc_norm,0.931,0.008018934050315158,0 storycloze_2016,acc,0.7530732228754676,0.00997199136038898,0 winogrande,acc,0.6495659037095501,0.013409047676670192,0