leixa commited on
Commit
565a550
·
verified ·
1 Parent(s): 0f1f022

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce9aac62294993f36d80147fc9d17da17586d22e952c325c6af63259870c0543
3
  size 17425352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:926cb90faeac72bc01bd16c9cfe6f27ba2999c8d2418ede55b2ab0f6bb924251
3
  size 17425352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a16b5b17c9db309d8fc8f984b1e1857d49df7dca79ddeb5b811408bb7bd47c8
3
  size 10252116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173e3b56481ac1faad65817a5c87d6290db52c6186ff8feed699c25e320e1b5e
3
  size 10252116
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0790c1a4e674a9ced8e4ba06bf81e8ca6c7f6cc25e0bc865ffc27c524e8bbe92
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b0ec3ee9533871c215e821b6c863f82727baf80e99d0e9076878ab947252c7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43da0688aca60835f4e18fa7e0f3cc099504828f82fd5dd994118be26b760a0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.13496794511303564,
5
  "eval_steps": 100,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -459,6 +459,154 @@
459
  "eval_samples_per_second": 33.549,
460
  "eval_steps_per_second": 16.775,
461
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  }
463
  ],
464
  "logging_steps": 5,
@@ -473,12 +621,12 @@
473
  "should_evaluate": false,
474
  "should_log": false,
475
  "should_save": true,
476
- "should_training_stop": false
477
  },
478
  "attributes": {}
479
  }
480
  },
481
- "total_flos": 5742197715763200.0,
482
  "train_batch_size": 2,
483
  "trial_name": null,
484
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1799572601507142,
5
  "eval_steps": 100,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
459
  "eval_samples_per_second": 33.549,
460
  "eval_steps_per_second": 16.775,
461
  "step": 300
462
+ },
463
+ {
464
+ "epoch": 0.1372174108649196,
465
+ "grad_norm": 0.1710512787103653,
466
+ "learning_rate": 1.3939877632809278e-05,
467
+ "loss": 1.7437,
468
+ "step": 305
469
+ },
470
+ {
471
+ "epoch": 0.13946687661680351,
472
+ "grad_norm": 0.1375483125448227,
473
+ "learning_rate": 1.257446259144494e-05,
474
+ "loss": 1.7683,
475
+ "step": 310
476
+ },
477
+ {
478
+ "epoch": 0.14171634236868744,
479
+ "grad_norm": 0.15804438292980194,
480
+ "learning_rate": 1.1269751908617277e-05,
481
+ "loss": 1.6869,
482
+ "step": 315
483
+ },
484
+ {
485
+ "epoch": 0.14396580812057136,
486
+ "grad_norm": 0.17680715024471283,
487
+ "learning_rate": 1.0027861829824952e-05,
488
+ "loss": 1.6359,
489
+ "step": 320
490
+ },
491
+ {
492
+ "epoch": 0.14621527387245528,
493
+ "grad_norm": 0.13866226375102997,
494
+ "learning_rate": 8.850806705317183e-06,
495
+ "loss": 1.7821,
496
+ "step": 325
497
+ },
498
+ {
499
+ "epoch": 0.14846473962433923,
500
+ "grad_norm": 0.1694657802581787,
501
+ "learning_rate": 7.740495722810271e-06,
502
+ "loss": 1.502,
503
+ "step": 330
504
+ },
505
+ {
506
+ "epoch": 0.15071420537622315,
507
+ "grad_norm": 0.16602936387062073,
508
+ "learning_rate": 6.698729810778065e-06,
509
+ "loss": 1.6365,
510
+ "step": 335
511
+ },
512
+ {
513
+ "epoch": 0.15296367112810708,
514
+ "grad_norm": 0.21175408363342285,
515
+ "learning_rate": 5.727198717339511e-06,
516
+ "loss": 1.9111,
517
+ "step": 340
518
+ },
519
+ {
520
+ "epoch": 0.155213136879991,
521
+ "grad_norm": 0.18618881702423096,
522
+ "learning_rate": 4.827478269480895e-06,
523
+ "loss": 1.7057,
524
+ "step": 345
525
+ },
526
+ {
527
+ "epoch": 0.15746260263187492,
528
+ "grad_norm": 0.19256582856178284,
529
+ "learning_rate": 4.001027817058789e-06,
530
+ "loss": 1.7551,
531
+ "step": 350
532
+ },
533
+ {
534
+ "epoch": 0.15971206838375887,
535
+ "grad_norm": 0.1473885327577591,
536
+ "learning_rate": 3.249187865729264e-06,
537
+ "loss": 1.6551,
538
+ "step": 355
539
+ },
540
+ {
541
+ "epoch": 0.1619615341356428,
542
+ "grad_norm": 0.14483609795570374,
543
+ "learning_rate": 2.573177902642726e-06,
544
+ "loss": 1.5671,
545
+ "step": 360
546
+ },
547
+ {
548
+ "epoch": 0.16421099988752672,
549
+ "grad_norm": 0.17720504105091095,
550
+ "learning_rate": 1.974094418431388e-06,
551
+ "loss": 1.5816,
552
+ "step": 365
553
+ },
554
+ {
555
+ "epoch": 0.16646046563941064,
556
+ "grad_norm": 0.1897529512643814,
557
+ "learning_rate": 1.4529091286973995e-06,
558
+ "loss": 1.5579,
559
+ "step": 370
560
+ },
561
+ {
562
+ "epoch": 0.16870993139129456,
563
+ "grad_norm": 0.12367403507232666,
564
+ "learning_rate": 1.0104673978866164e-06,
565
+ "loss": 1.4958,
566
+ "step": 375
567
+ },
568
+ {
569
+ "epoch": 0.17095939714317848,
570
+ "grad_norm": 0.11187849193811417,
571
+ "learning_rate": 6.474868681043578e-07,
572
+ "loss": 1.6368,
573
+ "step": 380
574
+ },
575
+ {
576
+ "epoch": 0.17320886289506243,
577
+ "grad_norm": 0.12326706200838089,
578
+ "learning_rate": 3.6455629509730136e-07,
579
+ "loss": 1.7971,
580
+ "step": 385
581
+ },
582
+ {
583
+ "epoch": 0.17545832864694635,
584
+ "grad_norm": 0.11397775262594223,
585
+ "learning_rate": 1.6213459328950352e-07,
586
+ "loss": 1.6835,
587
+ "step": 390
588
+ },
589
+ {
590
+ "epoch": 0.17770779439883028,
591
+ "grad_norm": 0.12962651252746582,
592
+ "learning_rate": 4.055009142152067e-08,
593
+ "loss": 1.7831,
594
+ "step": 395
595
+ },
596
+ {
597
+ "epoch": 0.1799572601507142,
598
+ "grad_norm": 0.19526903331279755,
599
+ "learning_rate": 0.0,
600
+ "loss": 1.6751,
601
+ "step": 400
602
+ },
603
+ {
604
+ "epoch": 0.1799572601507142,
605
+ "eval_loss": 1.7608678340911865,
606
+ "eval_runtime": 27.7885,
607
+ "eval_samples_per_second": 33.683,
608
+ "eval_steps_per_second": 16.841,
609
+ "step": 400
610
  }
611
  ],
612
  "logging_steps": 5,
 
621
  "should_evaluate": false,
622
  "should_log": false,
623
  "should_save": true,
624
+ "should_training_stop": true
625
  },
626
  "attributes": {}
627
  }
628
  },
629
+ "total_flos": 7619680791429120.0,
630
  "train_batch_size": 2,
631
  "trial_name": null,
632
  "trial_params": null