|
Step=512 |
|
Train={'loss': 0.2522342102420225, 'acc': 0.9353003393916879} |
|
Hellaswag/choice={'accuracy': 0.2432782314280024} |
|
Hellaswag/no_choice={'accuracy': 0.39533957379008167} |
|
obqa/main={'accuracy': 0.352} |
|
winogrande/no_choice={'accuracy': 0.5288082083662194} |
|
arc/ARC-Challenge={'accuracy': 0.28762541806020064} |
|
arc/ARC-Easy={'accuracy': 0.23684210526315788} |
|
super_glue/boolq={'accuracy': 0.6207951070336392} |
|
piqa/no_choice={'accuracy': 0.6702937976060935} |
|
GLUE/cola={'matthews_correlation': 0.07366254903146852} |
|
GLUE/mnli_matched={'accuracy': 0.50412633723892} |
|
GLUE/mnli_mismatched={'accuracy': 0.5191212367778681} |
|
GLUE/mrpc={'accuracy': 0.5955882352941176, 'f1': 0.6224256292906178} |
|
GLUE/qnli={'accuracy': 0.638843126487278} |
|
GLUE/qqp={'accuracy': 0.6608953747217413, 'f1': 0.1716012084592145} |
|
GLUE/rte={'accuracy': 0.5451263537906137} |
|
GLUE/sst2={'accuracy': 0.7981651376146789} |
|
GLUE/stsb={'pearson': 0.40257920134653435, 'spearmanr': 0.4067145869932732} |
|
GLUE/wnli={'accuracy': 0.43661971830985913} |
|
race/middle={'accuracy': 0.28690807799442897} |
|
race/high={'accuracy': 0.27325412923790204} |
|
|
|
Step=1024 |
|
Train={'loss': 0.2315280896382319, 'acc': 0.93988147865457} |
|
Hellaswag/choice={'accuracy': 0.25652260505875324} |
|
Hellaswag/no_choice={'accuracy': 0.3986257717586138} |
|
obqa/main={'accuracy': 0.304} |
|
winogrande/no_choice={'accuracy': 0.5477505919494869} |
|
arc/ARC-Challenge={'accuracy': 0.27424749163879597} |
|
arc/ARC-Easy={'accuracy': 0.24912280701754386} |
|
super_glue/boolq={'accuracy': 0.6993883792048929} |
|
piqa/no_choice={'accuracy': 0.6708378672470077} |
|
GLUE/cola={'matthews_correlation': 0.018148342420931135} |
|
GLUE/mnli_matched={'accuracy': 0.6188487009679062} |
|
GLUE/mnli_mismatched={'accuracy': 0.6425956061838893} |
|
GLUE/mrpc={'accuracy': 0.5122549019607843, 'f1': 0.48311688311688306} |
|
GLUE/qnli={'accuracy': 0.7206663005674537} |
|
GLUE/qqp={'accuracy': 0.7263665594855305, 'f1': 0.46589098633708304} |
|
GLUE/rte={'accuracy': 0.6389891696750902} |
|
GLUE/sst2={'accuracy': 0.8532110091743119} |
|
GLUE/stsb={'pearson': 0.7411057905516302, 'spearmanr': 0.7461350353633797} |
|
GLUE/wnli={'accuracy': 0.4507042253521127} |
|
race/middle={'accuracy': 0.3050139275766017} |
|
race/high={'accuracy': 0.2984642132715155} |
|
|
|
Step=1536 |
|
Train={'loss': 0.22321232529452573, 'acc': 0.9419659655977739} |
|
Hellaswag/choice={'accuracy': 0.26687910774746065} |
|
Hellaswag/no_choice={'accuracy': 0.40599482174865564} |
|
obqa/main={'accuracy': 0.41} |
|
winogrande/no_choice={'accuracy': 0.5374901341752171} |
|
arc/ARC-Challenge={'accuracy': 0.34448160535117056} |
|
arc/ARC-Easy={'accuracy': 0.38421052631578945} |
|
super_glue/boolq={'accuracy': 0.7318042813455657} |
|
piqa/no_choice={'accuracy': 0.6887921653971708} |
|
GLUE/cola={'matthews_correlation': 0.0592680243795702} |
|
GLUE/mnli_matched={'accuracy': 0.692002037697402} |
|
GLUE/mnli_mismatched={'accuracy': 0.7192839707078926} |
|
GLUE/mrpc={'accuracy': 0.75, 'f1': 0.8152173913043477} |
|
GLUE/qnli={'accuracy': 0.7305509793153945} |
|
GLUE/qqp={'accuracy': 0.7809794706900817, 'f1': 0.7399641734942589} |
|
GLUE/rte={'accuracy': 0.7292418772563177} |
|
GLUE/sst2={'accuracy': 0.8956422018348624} |
|
GLUE/stsb={'pearson': 0.8148224387760193, 'spearmanr': 0.8217037925795925} |
|
GLUE/wnli={'accuracy': 0.4507042253521127} |
|
race/middle={'accuracy': 0.44846796657381616} |
|
race/high={'accuracy': 0.3790205737467401} |
|
|
|
Step=2048 |
|
Train={'loss': 0.21945939150191407, 'acc': 0.9429137157858349} |
|
Hellaswag/choice={'accuracy': 0.31069508066122287} |
|
Hellaswag/no_choice={'accuracy': 0.41147181836287594} |
|
obqa/main={'accuracy': 0.458} |
|
winogrande/no_choice={'accuracy': 0.5501183898973955} |
|
arc/ARC-Challenge={'accuracy': 0.33444816053511706} |
|
arc/ARC-Easy={'accuracy': 0.43859649122807015} |
|
super_glue/boolq={'accuracy': 0.7168195718654434} |
|
piqa/no_choice={'accuracy': 0.6985854189336235} |
|
GLUE/cola={'matthews_correlation': 0.16848752002152778} |
|
GLUE/mnli_matched={'accuracy': 0.7270504330106979} |
|
GLUE/mnli_mismatched={'accuracy': 0.7459316517493898} |
|
GLUE/mrpc={'accuracy': 0.6740196078431373, 'f1': 0.7200000000000001} |
|
GLUE/qnli={'accuracy': 0.7700896943071572} |
|
GLUE/qqp={'accuracy': 0.7724214692060352, 'f1': 0.6149725907017618} |
|
GLUE/rte={'accuracy': 0.7148014440433214} |
|
GLUE/sst2={'accuracy': 0.9025229357798165} |
|
GLUE/stsb={'pearson': 0.8246185257017313, 'spearmanr': 0.8350095448603486} |
|
GLUE/wnli={'accuracy': 0.4507042253521127} |
|
race/middle={'accuracy': 0.5125348189415042} |
|
race/high={'accuracy': 0.47029846421327154} |
|
|
|
Step=2560 |
|
Train={'loss': 0.21488739675896795, 'acc': 0.9439892922091531} |
|
Hellaswag/choice={'accuracy': 0.3314080860386377} |
|
Hellaswag/no_choice={'accuracy': 0.41724756024696275} |
|
obqa/main={'accuracy': 0.438} |
|
winogrande/no_choice={'accuracy': 0.5588003157063931} |
|
arc/ARC-Challenge={'accuracy': 0.36789297658862874} |
|
arc/ARC-Easy={'accuracy': 0.45263157894736844} |
|
super_glue/boolq={'accuracy': 0.7394495412844037} |
|
piqa/no_choice={'accuracy': 0.691512513601741} |
|
GLUE/cola={'matthews_correlation': 0.13283318224051427} |
|
GLUE/mnli_matched={'accuracy': 0.6886398369842078} |
|
GLUE/mnli_mismatched={'accuracy': 0.7149104963384866} |
|
GLUE/mrpc={'accuracy': 0.7279411764705882, 'f1': 0.7948243992606283} |
|
GLUE/qnli={'accuracy': 0.7918725974739155} |
|
GLUE/qqp={'accuracy': 0.7937175364828098, 'f1': 0.7043391945547363} |
|
GLUE/rte={'accuracy': 0.7256317689530686} |
|
GLUE/sst2={'accuracy': 0.9071100917431193} |
|
GLUE/stsb={'pearson': 0.8263292509858257, 'spearmanr': 0.8273984673778166} |
|
GLUE/wnli={'accuracy': 0.4225352112676056} |
|
race/middle={'accuracy': 0.532033426183844} |
|
race/high={'accuracy': 0.48246884960880904} |
|
|
|
Step=3072 |
|
Train={'loss': 0.21078881736502808, 'acc': 0.9449855978600681} |
|
Hellaswag/choice={'accuracy': 0.36586337382991435} |
|
Hellaswag/no_choice={'accuracy': 0.41884086835291773} |
|
obqa/main={'accuracy': 0.434} |
|
winogrande/no_choice={'accuracy': 0.5509076558800315} |
|
arc/ARC-Challenge={'accuracy': 0.38461538461538464} |
|
arc/ARC-Easy={'accuracy': 0.4473684210526316} |
|
super_glue/boolq={'accuracy': 0.7593272171253823} |
|
piqa/no_choice={'accuracy': 0.6893362350380848} |
|
GLUE/cola={'matthews_correlation': 0.10831649799274126} |
|
GLUE/mnli_matched={'accuracy': 0.7113601630157922} |
|
GLUE/mnli_mismatched={'accuracy': 0.726606997558991} |
|
GLUE/mrpc={'accuracy': 0.7230392156862745, 'f1': 0.7871939736346516} |
|
GLUE/qnli={'accuracy': 0.8142046494600037} |
|
GLUE/qqp={'accuracy': 0.8000247341083354, 'f1': 0.7119803355776424} |
|
GLUE/rte={'accuracy': 0.7256317689530686} |
|
GLUE/sst2={'accuracy': 0.9025229357798165} |
|
GLUE/stsb={'pearson': 0.82582592808152, 'spearmanr': 0.8296324719979368} |
|
GLUE/wnli={'accuracy': 0.43661971830985913} |
|
race/middle={'accuracy': 0.5466573816155988} |
|
race/high={'accuracy': 0.5123152709359606} |
|
|
|
Step=3584 |
|
Train={'loss': 0.20607954572187737, 'acc': 0.9461158117628656} |
|
Hellaswag/choice={'accuracy': 0.3822943636725752} |
|
Hellaswag/no_choice={'accuracy': 0.4244174467237602} |
|
obqa/main={'accuracy': 0.45} |
|
winogrande/no_choice={'accuracy': 0.5453827940015785} |
|
arc/ARC-Challenge={'accuracy': 0.36789297658862874} |
|
arc/ARC-Easy={'accuracy': 0.45263157894736844} |
|
super_glue/boolq={'accuracy': 0.7623853211009174} |
|
piqa/no_choice={'accuracy': 0.6871599564744287} |
|
GLUE/cola={'matthews_correlation': 0.21406100184418933} |
|
GLUE/mnli_matched={'accuracy': 0.7315333672949567} |
|
GLUE/mnli_mismatched={'accuracy': 0.7505085435313262} |
|
GLUE/mrpc={'accuracy': 0.7156862745098039, 'f1': 0.7827715355805244} |
|
GLUE/qnli={'accuracy': 0.8222588321435109} |
|
GLUE/qqp={'accuracy': 0.8101162503091763, 'f1': 0.7367374232708069} |
|
GLUE/rte={'accuracy': 0.7364620938628159} |
|
GLUE/sst2={'accuracy': 0.8990825688073395} |
|
GLUE/stsb={'pearson': 0.8320267135296606, 'spearmanr': 0.8362641064475601} |
|
GLUE/wnli={'accuracy': 0.43661971830985913} |
|
race/middle={'accuracy': 0.5675487465181058} |
|
race/high={'accuracy': 0.5224572587655752} |
|
|
|
Step=4096 |
|
Train={'loss': 0.20388431550509267, 'acc': 0.9466150429943809} |
|
Hellaswag/choice={'accuracy': 0.38129854610635333} |
|
Hellaswag/no_choice={'accuracy': 0.42561242780322645} |
|
obqa/main={'accuracy': 0.444} |
|
winogrande/no_choice={'accuracy': 0.5430149960536701} |
|
arc/ARC-Challenge={'accuracy': 0.3712374581939799} |
|
arc/ARC-Easy={'accuracy': 0.443859649122807} |
|
super_glue/boolq={'accuracy': 0.7611620795107034} |
|
piqa/no_choice={'accuracy': 0.6958650707290533} |
|
GLUE/cola={'matthews_correlation': 0.21838662331923692} |
|
GLUE/mnli_matched={'accuracy': 0.7395822720326032} |
|
GLUE/mnli_mismatched={'accuracy': 0.7588486574450773} |
|
GLUE/mrpc={'accuracy': 0.7279411764705882, 'f1': 0.7819253438113949} |
|
GLUE/qnli={'accuracy': 0.8215266337177375} |
|
GLUE/qqp={'accuracy': 0.8023992085085333, 'f1': 0.7011335153941117} |
|
GLUE/rte={'accuracy': 0.7436823104693141} |
|
GLUE/sst2={'accuracy': 0.9071100917431193} |
|
GLUE/stsb={'pearson': 0.832917529289115, 'spearmanr': 0.8366141733875496} |
|
GLUE/wnli={'accuracy': 0.43661971830985913} |
|
race/middle={'accuracy': 0.5682451253481894} |
|
race/high={'accuracy': 0.5169516082294987} |
|
|
|
Step=4608 |
|
Train={'loss': 0.20215968282400354, 'acc': 0.9470157118194038} |
|
Hellaswag/choice={'accuracy': 0.39603664608643696} |
|
Hellaswag/no_choice={'accuracy': 0.4281019717187811} |
|
obqa/main={'accuracy': 0.43} |
|
winogrande/no_choice={'accuracy': 0.5509076558800315} |
|
arc/ARC-Challenge={'accuracy': 0.38127090301003347} |
|
arc/ARC-Easy={'accuracy': 0.4614035087719298} |
|
super_glue/boolq={'accuracy': 0.7605504587155963} |
|
piqa/no_choice={'accuracy': 0.6964091403699674} |
|
GLUE/cola={'matthews_correlation': 0.2308810545585645} |
|
GLUE/mnli_matched={'accuracy': 0.7256240448293428} |
|
GLUE/mnli_mismatched={'accuracy': 0.741253051261188} |
|
GLUE/mrpc={'accuracy': 0.7426470588235294, 'f1': 0.8} |
|
GLUE/qnli={'accuracy': 0.8218927329306241} |
|
GLUE/qqp={'accuracy': 0.7993569131832797, 'f1': 0.693701857725419} |
|
GLUE/rte={'accuracy': 0.7184115523465704} |
|
GLUE/sst2={'accuracy': 0.8979357798165137} |
|
GLUE/stsb={'pearson': 0.8335554394972398, 'spearmanr': 0.8359066615989392} |
|
GLUE/wnli={'accuracy': 0.4225352112676056} |
|
race/middle={'accuracy': 0.5682451253481894} |
|
race/high={'accuracy': 0.5273833671399595} |
|
|
|
Step=5120 |
|
Train={'loss': 0.20209107419077554, 'acc': 0.9470347743335878} |
|
Hellaswag/choice={'accuracy': 0.3852818163712408} |
|
Hellaswag/no_choice={'accuracy': 0.425911173073093} |
|
obqa/main={'accuracy': 0.44} |
|
winogrande/no_choice={'accuracy': 0.5469613259668509} |
|
arc/ARC-Challenge={'accuracy': 0.4013377926421405} |
|
arc/ARC-Easy={'accuracy': 0.4649122807017544} |
|
super_glue/boolq={'accuracy': 0.7636085626911315} |
|
piqa/no_choice={'accuracy': 0.6958650707290533} |
|
GLUE/cola={'matthews_correlation': 0.23868889984712757} |
|
GLUE/mnli_matched={'accuracy': 0.745491594498217} |
|
GLUE/mnli_mismatched={'accuracy': 0.761899918633035} |
|
GLUE/mrpc={'accuracy': 0.7352941176470589, 'f1': 0.8014705882352942} |
|
GLUE/qnli={'accuracy': 0.8176825919824272} |
|
GLUE/qqp={'accuracy': 0.8082364580756863, 'f1': 0.737284402426214} |
|
GLUE/rte={'accuracy': 0.7111913357400722} |
|
GLUE/sst2={'accuracy': 0.8979357798165137} |
|
GLUE/stsb={'pearson': 0.8370202575390767, 'spearmanr': 0.8390825240948236} |
|
GLUE/wnli={'accuracy': 0.4647887323943662} |
|
race/middle={'accuracy': 0.5731197771587744} |
|
race/high={'accuracy': 0.530860620110113} |
|
|
|
Step=5632 |
|
Train={'loss': 0.2028822331549236, 'acc': 0.9469025877478998} |
|
Hellaswag/choice={'accuracy': 0.38966341366261703} |
|
Hellaswag/no_choice={'accuracy': 0.425911173073093} |
|
obqa/main={'accuracy': 0.438} |
|
winogrande/no_choice={'accuracy': 0.5516969218626677} |
|
arc/ARC-Challenge={'accuracy': 0.39464882943143814} |
|
arc/ARC-Easy={'accuracy': 0.4614035087719298} |
|
super_glue/boolq={'accuracy': 0.7617737003058104} |
|
piqa/no_choice={'accuracy': 0.6936887921653971} |
|
GLUE/cola={'matthews_correlation': 0.24499744431737797} |
|
GLUE/mnli_matched={'accuracy': 0.7410086602139583} |
|
GLUE/mnli_mismatched={'accuracy': 0.7594589096826688} |
|
GLUE/mrpc={'accuracy': 0.7352941176470589, 'f1': 0.8014705882352942} |
|
GLUE/qnli={'accuracy': 0.818597840014644} |
|
GLUE/qqp={'accuracy': 0.8090526836507544, 'f1': 0.7393124873370702} |
|
GLUE/rte={'accuracy': 0.7184115523465704} |
|
GLUE/sst2={'accuracy': 0.8990825688073395} |
|
GLUE/stsb={'pearson': 0.8406728176670001, 'spearmanr': 0.8427300768224941} |
|
GLUE/wnli={'accuracy': 0.43661971830985913} |
|
race/middle={'accuracy': 0.5738161559888579} |
|
race/high={'accuracy': 0.5299913068675746} |
|
|
|
Step=6144 |
|
Train={'loss': 0.20155814645704595, 'acc': 0.9471458137704758} |
|
Hellaswag/choice={'accuracy': 0.3899621589324836} |
|
Hellaswag/no_choice={'accuracy': 0.4255128460466043} |
|
obqa/main={'accuracy': 0.438} |
|
winogrande/no_choice={'accuracy': 0.5501183898973955} |
|
arc/ARC-Challenge={'accuracy': 0.391304347826087} |
|
arc/ARC-Easy={'accuracy': 0.45964912280701753} |
|
super_glue/boolq={'accuracy': 0.7620795107033639} |
|
piqa/no_choice={'accuracy': 0.6931447225244831} |
|
GLUE/cola={'matthews_correlation': 0.24066818407079058} |
|
GLUE/mnli_matched={'accuracy': 0.741110545084055} |
|
GLUE/mnli_mismatched={'accuracy': 0.7594589096826688} |
|
GLUE/mrpc={'accuracy': 0.7352941176470589, 'f1': 0.8014705882352942} |
|
GLUE/qnli={'accuracy': 0.818597840014644} |
|
GLUE/qqp={'accuracy': 0.8092010883007668, 'f1': 0.7396030245746693} |
|
GLUE/rte={'accuracy': 0.7148014440433214} |
|
GLUE/sst2={'accuracy': 0.8990825688073395} |
|
GLUE/stsb={'pearson': 0.8406896598512256, 'spearmanr': 0.8428307063668854} |
|
GLUE/wnli={'accuracy': 0.43661971830985913} |
|
race/middle={'accuracy': 0.5745125348189415} |
|
race/high={'accuracy': 0.5282526803824978} |
|
|
|
|