ardaspear commited on
Commit
c124e62
·
verified ·
1 Parent(s): fe044a0

Training in progress, step 204, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c925b87b8d7870ce14fd59fe02160bc0a9b8db123ed15f75cea43d25afd664bb
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b11ec8052c5552f688ed9d416868cd53ddb56b5d87ead5a1ac28143162119f3
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50f8022e28ae5b2ba03c1fd7bc121b9ea02cb3e858280eee3e2ab2462cc80c3d
3
  size 37134420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c07cb456027b1e8133d5349eb14073d4e3f3071ed39e24a44c43b361c9a7e331
3
  size 37134420
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8cfbc8e54b819f718872be40a285c49725dec3a1d4f07ec32ca0a5444a91520
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8513bc15602cc9778e1a951f5ade81824ac1d664cd10d25d7f7f817465e8501d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f21ce5519aba36efeb75a8dad39ab6bd85bd42d0ae24cbc1f5cfa5d96741b8bc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9839d107756d9c8815de9164f2ebf92c05b3536704a349ca5892084df7663e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.048336650554449814,
5
  "eval_steps": 34,
6
- "global_step": 170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -447,6 +447,98 @@
447
  "eval_samples_per_second": 35.251,
448
  "eval_steps_per_second": 4.409,
449
  "step": 170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "logging_steps": 3,
@@ -466,7 +558,7 @@
466
  "attributes": {}
467
  }
468
  },
469
- "total_flos": 5.444000299352064e+16,
470
  "train_batch_size": 8,
471
  "trial_name": null,
472
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05800398066533978,
5
  "eval_steps": 34,
6
+ "global_step": 204,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
447
  "eval_samples_per_second": 35.251,
448
  "eval_steps_per_second": 4.409,
449
  "step": 170
450
+ },
451
+ {
452
+ "epoch": 0.048620983793005404,
453
+ "grad_norm": 0.4114479720592499,
454
+ "learning_rate": 3.17617799075421e-05,
455
+ "loss": 0.6287,
456
+ "step": 171
457
+ },
458
+ {
459
+ "epoch": 0.049473983508672166,
460
+ "grad_norm": 0.36733385920524597,
461
+ "learning_rate": 3.1178227669141744e-05,
462
+ "loss": 0.621,
463
+ "step": 174
464
+ },
465
+ {
466
+ "epoch": 0.05032698322433893,
467
+ "grad_norm": 0.3716995418071747,
468
+ "learning_rate": 3.0591067519763895e-05,
469
+ "loss": 0.5532,
470
+ "step": 177
471
+ },
472
+ {
473
+ "epoch": 0.05117998294000568,
474
+ "grad_norm": 0.4997945725917816,
475
+ "learning_rate": 3.0000642344401113e-05,
476
+ "loss": 0.6024,
477
+ "step": 180
478
+ },
479
+ {
480
+ "epoch": 0.052032982655672445,
481
+ "grad_norm": 0.43555817008018494,
482
+ "learning_rate": 2.9407296934729227e-05,
483
+ "loss": 0.5092,
484
+ "step": 183
485
+ },
486
+ {
487
+ "epoch": 0.05288598237133921,
488
+ "grad_norm": 0.49766504764556885,
489
+ "learning_rate": 2.8811377787758636e-05,
490
+ "loss": 0.6556,
491
+ "step": 186
492
+ },
493
+ {
494
+ "epoch": 0.05373898208700597,
495
+ "grad_norm": 0.6171467304229736,
496
+ "learning_rate": 2.8213232903489865e-05,
497
+ "loss": 0.5736,
498
+ "step": 189
499
+ },
500
+ {
501
+ "epoch": 0.05459198180267273,
502
+ "grad_norm": 0.8651450276374817,
503
+ "learning_rate": 2.761321158169134e-05,
504
+ "loss": 0.5568,
505
+ "step": 192
506
+ },
507
+ {
508
+ "epoch": 0.055444981518339494,
509
+ "grad_norm": 0.4486936926841736,
510
+ "learning_rate": 2.7011664217918154e-05,
511
+ "loss": 0.6145,
512
+ "step": 195
513
+ },
514
+ {
515
+ "epoch": 0.056297981234006256,
516
+ "grad_norm": 0.5348999500274658,
517
+ "learning_rate": 2.6408942098890936e-05,
518
+ "loss": 0.591,
519
+ "step": 198
520
+ },
521
+ {
522
+ "epoch": 0.05715098094967302,
523
+ "grad_norm": 0.4498997628688812,
524
+ "learning_rate": 2.580539719735433e-05,
525
+ "loss": 0.5572,
526
+ "step": 201
527
+ },
528
+ {
529
+ "epoch": 0.05800398066533978,
530
+ "grad_norm": 0.5493082404136658,
531
+ "learning_rate": 2.5201381966534748e-05,
532
+ "loss": 0.5316,
533
+ "step": 204
534
+ },
535
+ {
536
+ "epoch": 0.05800398066533978,
537
+ "eval_loss": 0.5341343879699707,
538
+ "eval_runtime": 167.9533,
539
+ "eval_samples_per_second": 35.272,
540
+ "eval_steps_per_second": 4.412,
541
+ "step": 204
542
  }
543
  ],
544
  "logging_steps": 3,
 
558
  "attributes": {}
559
  }
560
  },
561
+ "total_flos": 6.532800359222477e+16,
562
  "train_batch_size": 8,
563
  "trial_name": null,
564
  "trial_params": null