farmery commited on
Commit
73527b8
·
verified ·
1 Parent(s): 899cbee

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:917b9908d6ddf96c924657a17ab9a77947c3dee4187dc99733d028b7da01a370
3
  size 17425352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf81c5e97ca4a065c0ca02c64490fbed69ea1e0e634764c39ac360fb3de23f6
3
  size 17425352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63574ea2b3926287ca101305d0da3612adfdea5a67b7f1e2bae3ca77824eb182
3
  size 10252116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:984b56c48c9023638f9dc3adc119ed4e91a43dba7303ad022093e52653b71381
3
  size 10252116
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c3113ab5e74cf2cf75b8272697182349b986f05fea887c23c6b1cf30a10723
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfbf1d0b7e0c82bee0aed83acaa37307cfaf516b484db2905492adc52f49ba9
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:948034a5628471bb3c37b4e388f6a1656b75ee096cbb96c84fa1a5c61b6c63e2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40ef8a2c12734f9f93f5fc7d12d061c1cc3193923b0707973cc8a18e1aa21c9c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e03f9a24a7a90769559ab2186e52e4bea52a0ca73705b5babf1706779cbc29f3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57e242258a295316541298a461d1944eecf22f51f4004aa98733dd4c28ccbc6
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36ecb369e890de96dc252a97d723cd39a5de3933ea7d729b6a84c9264b7d8b05
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432e714a282ec27bfff2b100b31fd4b7108bbf4d435b0fa544488631f793c049
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43da0688aca60835f4e18fa7e0f3cc099504828f82fd5dd994118be26b760a0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07801833430856252,
5
  "eval_steps": 100,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -459,6 +459,154 @@
459
  "eval_samples_per_second": 140.952,
460
  "eval_steps_per_second": 17.627,
461
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  }
463
  ],
464
  "logging_steps": 5,
@@ -473,12 +621,12 @@
473
  "should_evaluate": false,
474
  "should_log": false,
475
  "should_save": true,
476
- "should_training_stop": false
477
  },
478
  "attributes": {}
479
  }
480
  },
481
- "total_flos": 1.905704121348915e+16,
482
  "train_batch_size": 2,
483
  "trial_name": null,
484
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.10402444574475002,
5
  "eval_steps": 100,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
459
  "eval_samples_per_second": 140.952,
460
  "eval_steps_per_second": 17.627,
461
  "step": 300
462
+ },
463
+ {
464
+ "epoch": 0.07931863988037188,
465
+ "grad_norm": 0.22041112184524536,
466
+ "learning_rate": 1.3939877632809278e-05,
467
+ "loss": 2.406,
468
+ "step": 305
469
+ },
470
+ {
471
+ "epoch": 0.08061894545218126,
472
+ "grad_norm": 0.2416123002767563,
473
+ "learning_rate": 1.257446259144494e-05,
474
+ "loss": 2.3487,
475
+ "step": 310
476
+ },
477
+ {
478
+ "epoch": 0.08191925102399064,
479
+ "grad_norm": 0.21595120429992676,
480
+ "learning_rate": 1.1269751908617277e-05,
481
+ "loss": 2.3856,
482
+ "step": 315
483
+ },
484
+ {
485
+ "epoch": 0.0832195565958,
486
+ "grad_norm": 0.21398255228996277,
487
+ "learning_rate": 1.0027861829824952e-05,
488
+ "loss": 2.4034,
489
+ "step": 320
490
+ },
491
+ {
492
+ "epoch": 0.08451986216760939,
493
+ "grad_norm": 0.2343326061964035,
494
+ "learning_rate": 8.850806705317183e-06,
495
+ "loss": 2.3638,
496
+ "step": 325
497
+ },
498
+ {
499
+ "epoch": 0.08582016773941877,
500
+ "grad_norm": 0.23280276358127594,
501
+ "learning_rate": 7.740495722810271e-06,
502
+ "loss": 2.3899,
503
+ "step": 330
504
+ },
505
+ {
506
+ "epoch": 0.08712047331122814,
507
+ "grad_norm": 0.22366644442081451,
508
+ "learning_rate": 6.698729810778065e-06,
509
+ "loss": 2.4007,
510
+ "step": 335
511
+ },
512
+ {
513
+ "epoch": 0.08842077888303751,
514
+ "grad_norm": 0.25122907757759094,
515
+ "learning_rate": 5.727198717339511e-06,
516
+ "loss": 2.3721,
517
+ "step": 340
518
+ },
519
+ {
520
+ "epoch": 0.08972108445484689,
521
+ "grad_norm": 0.25753486156463623,
522
+ "learning_rate": 4.827478269480895e-06,
523
+ "loss": 2.3808,
524
+ "step": 345
525
+ },
526
+ {
527
+ "epoch": 0.09102139002665627,
528
+ "grad_norm": 0.22881199419498444,
529
+ "learning_rate": 4.001027817058789e-06,
530
+ "loss": 2.3813,
531
+ "step": 350
532
+ },
533
+ {
534
+ "epoch": 0.09232169559846563,
535
+ "grad_norm": 0.22728504240512848,
536
+ "learning_rate": 3.249187865729264e-06,
537
+ "loss": 2.3886,
538
+ "step": 355
539
+ },
540
+ {
541
+ "epoch": 0.09362200117027501,
542
+ "grad_norm": 0.2537117898464203,
543
+ "learning_rate": 2.573177902642726e-06,
544
+ "loss": 2.4104,
545
+ "step": 360
546
+ },
547
+ {
548
+ "epoch": 0.09492230674208439,
549
+ "grad_norm": 0.2203947901725769,
550
+ "learning_rate": 1.974094418431388e-06,
551
+ "loss": 2.3652,
552
+ "step": 365
553
+ },
554
+ {
555
+ "epoch": 0.09622261231389377,
556
+ "grad_norm": 0.24142761528491974,
557
+ "learning_rate": 1.4529091286973995e-06,
558
+ "loss": 2.4902,
559
+ "step": 370
560
+ },
561
+ {
562
+ "epoch": 0.09752291788570314,
563
+ "grad_norm": 0.2279200553894043,
564
+ "learning_rate": 1.0104673978866164e-06,
565
+ "loss": 2.4341,
566
+ "step": 375
567
+ },
568
+ {
569
+ "epoch": 0.09882322345751252,
570
+ "grad_norm": 0.22958961129188538,
571
+ "learning_rate": 6.474868681043578e-07,
572
+ "loss": 2.2886,
573
+ "step": 380
574
+ },
575
+ {
576
+ "epoch": 0.1001235290293219,
577
+ "grad_norm": 0.25041958689689636,
578
+ "learning_rate": 3.6455629509730136e-07,
579
+ "loss": 2.3789,
580
+ "step": 385
581
+ },
582
+ {
583
+ "epoch": 0.10142383460113126,
584
+ "grad_norm": 0.2322605699300766,
585
+ "learning_rate": 1.6213459328950352e-07,
586
+ "loss": 2.4435,
587
+ "step": 390
588
+ },
589
+ {
590
+ "epoch": 0.10272414017294064,
591
+ "grad_norm": 0.22026541829109192,
592
+ "learning_rate": 4.055009142152067e-08,
593
+ "loss": 2.3933,
594
+ "step": 395
595
+ },
596
+ {
597
+ "epoch": 0.10402444574475002,
598
+ "grad_norm": 0.22310110926628113,
599
+ "learning_rate": 0.0,
600
+ "loss": 2.3593,
601
+ "step": 400
602
+ },
603
+ {
604
+ "epoch": 0.10402444574475002,
605
+ "eval_loss": 2.4176995754241943,
606
+ "eval_runtime": 46.0549,
607
+ "eval_samples_per_second": 140.636,
608
+ "eval_steps_per_second": 17.588,
609
+ "step": 400
610
  }
611
  ],
612
  "logging_steps": 5,
 
621
  "should_evaluate": false,
622
  "should_log": false,
623
  "should_save": true,
624
+ "should_training_stop": true
625
  },
626
  "attributes": {}
627
  }
628
  },
629
+ "total_flos": 2.5375418471153664e+16,
630
  "train_batch_size": 2,
631
  "trial_name": null,
632
  "trial_params": null