farmery commited on
Commit
519290f
·
verified ·
1 Parent(s): 4f16f4f

Training in progress, step 64, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:805896509ff9103abfa0a4a966d5713a4bd1c8741b438690f08f8ff9bcf9a8c5
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c35c9188534dff696e2c7c8d14e4c793f0241bf24c9eb49e50ebd0c8b2fcec5
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:504f644ebf18fb20cca271336024c1112033cc0eaa4cc55a1f807f81927c547f
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60522b1def48b724a63c8ba611c3b3877737c88038937468179f77ef563f0056
3
  size 335922386
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c011fb688865d21f9702ea14ccc107031312eead0eacc5dd98d0ac2ef6bc99b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec27a7948b1af9e386642f089cbf00d030ac5076d348d5d6cf62fd721edacb89
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4d54e9fc5ebf69f3767b40c296d70c16d79ad04acb8328b23771380f1b5cd4d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a74c2697a5d2f34c7cd0889810082a6eb359f0cfe7581cc22d067d10bb5705e0
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7099c77b3a457f97f7c88b43d981367cdc640db4e9df385fd22eb51a05aec7c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e0a7bbf100aa101bfb5a43ee6254102010ca2cbe8f455012b88d1102b67cdc
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a997b593ce85261f4c7fc16a49d26b47625891bf34dd47c65e1d0b386368723
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0775547a212ad9530d249fd95fad7940c94e5932139984768d41bb9352e5a7f4
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c4e947a403026a15276d2767326d990009a35d8fce1c5e4312326f271ea16f0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59692df4f58e2dc39b05fe970cb9ef53e14c5bd82e536b0d52f878980bedcb4a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3255813953488373,
5
  "eval_steps": 6,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -429,6 +429,120 @@
429
  "learning_rate": 1.5687918106563326e-05,
430
  "loss": 3.7934,
431
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  }
433
  ],
434
  "logging_steps": 1,
@@ -443,12 +557,12 @@
443
  "should_evaluate": false,
444
  "should_log": false,
445
  "should_save": true,
446
- "should_training_stop": false
447
  },
448
  "attributes": {}
449
  }
450
  },
451
- "total_flos": 1.483774567120896e+17,
452
  "train_batch_size": 8,
453
  "trial_name": null,
454
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9767441860465116,
5
  "eval_steps": 6,
6
+ "global_step": 64,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
429
  "learning_rate": 1.5687918106563326e-05,
430
  "loss": 3.7934,
431
  "step": 50
432
+ },
433
+ {
434
+ "epoch": 2.3720930232558137,
435
+ "grad_norm": 1.7052773237228394,
436
+ "learning_rate": 1.3631317921347563e-05,
437
+ "loss": 3.7972,
438
+ "step": 51
439
+ },
440
+ {
441
+ "epoch": 2.4186046511627906,
442
+ "grad_norm": 2.181281089782715,
443
+ "learning_rate": 1.1697777844051105e-05,
444
+ "loss": 3.8197,
445
+ "step": 52
446
+ },
447
+ {
448
+ "epoch": 2.4651162790697674,
449
+ "grad_norm": 2.764360189437866,
450
+ "learning_rate": 9.893840362247809e-06,
451
+ "loss": 3.8114,
452
+ "step": 53
453
+ },
454
+ {
455
+ "epoch": 2.511627906976744,
456
+ "grad_norm": 2.347971200942993,
457
+ "learning_rate": 8.225609429353187e-06,
458
+ "loss": 3.8165,
459
+ "step": 54
460
+ },
461
+ {
462
+ "epoch": 2.511627906976744,
463
+ "eval_loss": 3.8157098293304443,
464
+ "eval_runtime": 1.3802,
465
+ "eval_samples_per_second": 51.442,
466
+ "eval_steps_per_second": 2.174,
467
+ "step": 54
468
+ },
469
+ {
470
+ "epoch": 2.558139534883721,
471
+ "grad_norm": 2.3315491676330566,
472
+ "learning_rate": 6.698729810778065e-06,
473
+ "loss": 3.7953,
474
+ "step": 55
475
+ },
476
+ {
477
+ "epoch": 2.604651162790698,
478
+ "grad_norm": 2.439504384994507,
479
+ "learning_rate": 5.318367983829392e-06,
480
+ "loss": 3.8505,
481
+ "step": 56
482
+ },
483
+ {
484
+ "epoch": 2.6511627906976747,
485
+ "grad_norm": 1.8666633367538452,
486
+ "learning_rate": 4.089194655986306e-06,
487
+ "loss": 3.8066,
488
+ "step": 57
489
+ },
490
+ {
491
+ "epoch": 2.697674418604651,
492
+ "grad_norm": 1.904817819595337,
493
+ "learning_rate": 3.0153689607045845e-06,
494
+ "loss": 3.8182,
495
+ "step": 58
496
+ },
497
+ {
498
+ "epoch": 2.744186046511628,
499
+ "grad_norm": 3.3425002098083496,
500
+ "learning_rate": 2.100524384225555e-06,
501
+ "loss": 3.7978,
502
+ "step": 59
503
+ },
504
+ {
505
+ "epoch": 2.7906976744186047,
506
+ "grad_norm": 1.6687263250350952,
507
+ "learning_rate": 1.3477564710088098e-06,
508
+ "loss": 3.8162,
509
+ "step": 60
510
+ },
511
+ {
512
+ "epoch": 2.7906976744186047,
513
+ "eval_loss": 3.805279016494751,
514
+ "eval_runtime": 1.3795,
515
+ "eval_samples_per_second": 51.467,
516
+ "eval_steps_per_second": 2.175,
517
+ "step": 60
518
+ },
519
+ {
520
+ "epoch": 2.8372093023255816,
521
+ "grad_norm": 2.4629361629486084,
522
+ "learning_rate": 7.596123493895991e-07,
523
+ "loss": 3.7868,
524
+ "step": 61
525
+ },
526
+ {
527
+ "epoch": 2.883720930232558,
528
+ "grad_norm": 3.152594804763794,
529
+ "learning_rate": 3.380821129028489e-07,
530
+ "loss": 3.8117,
531
+ "step": 62
532
+ },
533
+ {
534
+ "epoch": 2.9302325581395348,
535
+ "grad_norm": 1.8460975885391235,
536
+ "learning_rate": 8.459208643659122e-08,
537
+ "loss": 3.8007,
538
+ "step": 63
539
+ },
540
+ {
541
+ "epoch": 2.9767441860465116,
542
+ "grad_norm": 3.2109551429748535,
543
+ "learning_rate": 0.0,
544
+ "loss": 3.8177,
545
+ "step": 64
546
  }
547
  ],
548
  "logging_steps": 1,
 
557
  "should_evaluate": false,
558
  "should_log": false,
559
  "should_save": true,
560
+ "should_training_stop": true
561
  },
562
  "attributes": {}
563
  }
564
  },
565
+ "total_flos": 1.899231445914747e+17,
566
  "train_batch_size": 8,
567
  "trial_name": null,
568
  "trial_params": null