iamnguyen commited on
Commit
5175e1f
·
verified ·
1 Parent(s): 86a3597

Training in progress, step 10256, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7653236f02b691ce82a2a43417c83d3377d27624301a0130d41eb494a00cac
3
  size 903834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be8f064d453a57ea7273970350761d964c9807f1c078006d860d03a3b99aecd4
3
  size 903834408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a6bd0efd5540f662ed66fd619970292bdab172c0361f8d5b2102b5707a107f7
3
  size 1807824186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12874bf81eb0524d19891bca2ddcc85a7634979c700dd291ce0217fc58255786
3
  size 1807824186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15c770040b9aeae6fdfeacf1de73c2a84a5bb50f203a810f2a9b404ddfe1daa6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed797abbb6e575252328647d2519975ca5213c166e33893bf2b6cbc2d2b0579d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a669e3de694f575fc5fe2b31bd21c056c9b69b5d3d6d1d403d2181bc0a961d8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:075aea1ae34d22fd6b751b1b0ff5783a336ded95b0d65058f0b4391e6f395a77
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.661979797979798,
5
  "eval_steps": 16,
6
- "global_step": 10240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -77447,6 +77447,127 @@
77447
  "eval_samples_per_second": 11.726,
77448
  "eval_steps_per_second": 1.466,
77449
  "step": 10240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77450
  }
77451
  ],
77452
  "logging_steps": 1,
@@ -77466,7 +77587,7 @@
77466
  "attributes": {}
77467
  }
77468
  },
77469
- "total_flos": 1.995433215787008e+17,
77470
  "train_batch_size": 8,
77471
  "trial_name": null,
77472
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6630141414141414,
5
  "eval_steps": 16,
6
+ "global_step": 10256,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
77447
  "eval_samples_per_second": 11.726,
77448
  "eval_steps_per_second": 1.466,
77449
  "step": 10240
77450
+ },
77451
+ {
77452
+ "epoch": 0.6620444444444444,
77453
+ "grad_norm": 0.049845460802316666,
77454
+ "learning_rate": 0.00017849373338296403,
77455
+ "loss": 0.0913,
77456
+ "step": 10241
77457
+ },
77458
+ {
77459
+ "epoch": 0.6621090909090909,
77460
+ "grad_norm": 0.0473443903028965,
77461
+ "learning_rate": 0.00017848949616165787,
77462
+ "loss": 0.0742,
77463
+ "step": 10242
77464
+ },
77465
+ {
77466
+ "epoch": 0.6621737373737374,
77467
+ "grad_norm": 0.05419163033366203,
77468
+ "learning_rate": 0.0001784852585732823,
77469
+ "loss": 0.0863,
77470
+ "step": 10243
77471
+ },
77472
+ {
77473
+ "epoch": 0.6622383838383838,
77474
+ "grad_norm": 0.06253422796726227,
77475
+ "learning_rate": 0.00017848102061785709,
77476
+ "loss": 0.1065,
77477
+ "step": 10244
77478
+ },
77479
+ {
77480
+ "epoch": 0.6623030303030303,
77481
+ "grad_norm": 0.057101961225271225,
77482
+ "learning_rate": 0.0001784767822954021,
77483
+ "loss": 0.1016,
77484
+ "step": 10245
77485
+ },
77486
+ {
77487
+ "epoch": 0.6623676767676767,
77488
+ "grad_norm": 0.06272678822278976,
77489
+ "learning_rate": 0.00017847254360593717,
77490
+ "loss": 0.0794,
77491
+ "step": 10246
77492
+ },
77493
+ {
77494
+ "epoch": 0.6624323232323233,
77495
+ "grad_norm": 0.051759131252765656,
77496
+ "learning_rate": 0.00017846830454948208,
77497
+ "loss": 0.0738,
77498
+ "step": 10247
77499
+ },
77500
+ {
77501
+ "epoch": 0.6624969696969697,
77502
+ "grad_norm": 0.05439075082540512,
77503
+ "learning_rate": 0.00017846406512605668,
77504
+ "loss": 0.0944,
77505
+ "step": 10248
77506
+ },
77507
+ {
77508
+ "epoch": 0.6625616161616161,
77509
+ "grad_norm": 0.05330037698149681,
77510
+ "learning_rate": 0.00017845982533568075,
77511
+ "loss": 0.088,
77512
+ "step": 10249
77513
+ },
77514
+ {
77515
+ "epoch": 0.6626262626262627,
77516
+ "grad_norm": 0.04516725614666939,
77517
+ "learning_rate": 0.0001784555851783742,
77518
+ "loss": 0.0759,
77519
+ "step": 10250
77520
+ },
77521
+ {
77522
+ "epoch": 0.6626909090909091,
77523
+ "grad_norm": 0.054199665784835815,
77524
+ "learning_rate": 0.0001784513446541568,
77525
+ "loss": 0.0869,
77526
+ "step": 10251
77527
+ },
77528
+ {
77529
+ "epoch": 0.6627555555555555,
77530
+ "grad_norm": 0.05711120367050171,
77531
+ "learning_rate": 0.0001784471037630484,
77532
+ "loss": 0.0828,
77533
+ "step": 10252
77534
+ },
77535
+ {
77536
+ "epoch": 0.662820202020202,
77537
+ "grad_norm": 0.049388039857149124,
77538
+ "learning_rate": 0.00017844286250506884,
77539
+ "loss": 0.0858,
77540
+ "step": 10253
77541
+ },
77542
+ {
77543
+ "epoch": 0.6628848484848485,
77544
+ "grad_norm": 0.04961549863219261,
77545
+ "learning_rate": 0.0001784386208802379,
77546
+ "loss": 0.0852,
77547
+ "step": 10254
77548
+ },
77549
+ {
77550
+ "epoch": 0.662949494949495,
77551
+ "grad_norm": 0.05711861327290535,
77552
+ "learning_rate": 0.0001784343788885755,
77553
+ "loss": 0.0849,
77554
+ "step": 10255
77555
+ },
77556
+ {
77557
+ "epoch": 0.6630141414141414,
77558
+ "grad_norm": 0.06972243636846542,
77559
+ "learning_rate": 0.00017843013653010144,
77560
+ "loss": 0.1039,
77561
+ "step": 10256
77562
+ },
77563
+ {
77564
+ "epoch": 0.6630141414141414,
77565
+ "eval_bleu": 20.086468326664342,
77566
+ "eval_loss": 0.08900181949138641,
77567
+ "eval_runtime": 2.8036,
77568
+ "eval_samples_per_second": 11.414,
77569
+ "eval_steps_per_second": 1.427,
77570
+ "step": 10256
77571
  }
77572
  ],
77573
  "logging_steps": 1,
 
77587
  "attributes": {}
77588
  }
77589
  },
77590
+ "total_flos": 1.9985510801866752e+17,
77591
  "train_batch_size": 8,
77592
  "trial_name": null,
77593
  "trial_params": null