diaenra commited on
Commit
6684c08
·
verified ·
1 Parent(s): 43e8e87

Training in progress, step 6453, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8928ff8f51bd78c59585c868d20425ba02a6370277c420eeda6f4e8d5eb17a49
3
  size 377528296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9424ead24c1f9928d93dcca08f08662e27a0300efc062a3a829f95c3b8e226c6
3
  size 377528296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41b780706cff8eec37e347c899b7f07bacb27079e383c3f0a721314d98f29cdf
3
  size 755217530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d27d2e7252f6cdeeeabe5085916f4a1a350decae7f63cd39ae04981d36e05dc3
3
  size 755217530
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da85130f73259deb4a2c080bf5c86f12b71247fcdbde96da98ff72288b1f15ce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20d715ec9ae5a99a51d0f413b64f52f539737bb65299888f322d1b46910817b7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f98d23d86b9304de1f65ed18149aa9c1b45c77f48deb0afa9350f43817a4aabb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0228103870c5c91f9e7c1c49686736ceb20668b7b5baf93d7127be66bdf65f06
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9372549019607843,
5
  "eval_steps": 500,
6
- "global_step": 6214,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -43505,6 +43505,1679 @@
43505
  "learning_rate": 9.980445299898722e-07,
43506
  "loss": 1.305,
43507
  "step": 6214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43508
  }
43509
  ],
43510
  "logging_steps": 1,
@@ -43524,7 +45197,7 @@
43524
  "attributes": {}
43525
  }
43526
  },
43527
- "total_flos": 6.908284322080358e+18,
43528
  "train_batch_size": 4,
43529
  "trial_name": null,
43530
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9733031674208145,
5
  "eval_steps": 500,
6
+ "global_step": 6453,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
43505
  "learning_rate": 9.980445299898722e-07,
43506
  "loss": 1.305,
43507
  "step": 6214
43508
+ },
43509
+ {
43510
+ "epoch": 0.9374057315233786,
43511
+ "grad_norm": 1.9081813097000122,
43512
+ "learning_rate": 9.932679372475883e-07,
43513
+ "loss": 0.9995,
43514
+ "step": 6215
43515
+ },
43516
+ {
43517
+ "epoch": 0.9375565610859729,
43518
+ "grad_norm": 1.9167311191558838,
43519
+ "learning_rate": 9.885026875362746e-07,
43520
+ "loss": 0.9232,
43521
+ "step": 6216
43522
+ },
43523
+ {
43524
+ "epoch": 0.9377073906485671,
43525
+ "grad_norm": 1.6560578346252441,
43526
+ "learning_rate": 9.83748781958882e-07,
43527
+ "loss": 0.9508,
43528
+ "step": 6217
43529
+ },
43530
+ {
43531
+ "epoch": 0.9378582202111614,
43532
+ "grad_norm": 1.9292479753494263,
43533
+ "learning_rate": 9.790062216157469e-07,
43534
+ "loss": 1.1442,
43535
+ "step": 6218
43536
+ },
43537
+ {
43538
+ "epoch": 0.9380090497737557,
43539
+ "grad_norm": 2.204526901245117,
43540
+ "learning_rate": 9.742750076045749e-07,
43541
+ "loss": 1.1502,
43542
+ "step": 6219
43543
+ },
43544
+ {
43545
+ "epoch": 0.9381598793363499,
43546
+ "grad_norm": 2.1179401874542236,
43547
+ "learning_rate": 9.695551410204506e-07,
43548
+ "loss": 1.2448,
43549
+ "step": 6220
43550
+ },
43551
+ {
43552
+ "epoch": 0.9383107088989442,
43553
+ "grad_norm": 2.0917677879333496,
43554
+ "learning_rate": 9.648466229558174e-07,
43555
+ "loss": 1.2901,
43556
+ "step": 6221
43557
+ },
43558
+ {
43559
+ "epoch": 0.9384615384615385,
43560
+ "grad_norm": 1.9553565979003906,
43561
+ "learning_rate": 9.601494545005085e-07,
43562
+ "loss": 0.9956,
43563
+ "step": 6222
43564
+ },
43565
+ {
43566
+ "epoch": 0.9386123680241327,
43567
+ "grad_norm": 2.0436511039733887,
43568
+ "learning_rate": 9.554636367417269e-07,
43569
+ "loss": 0.9519,
43570
+ "step": 6223
43571
+ },
43572
+ {
43573
+ "epoch": 0.938763197586727,
43574
+ "grad_norm": 1.8288614749908447,
43575
+ "learning_rate": 9.507891707640437e-07,
43576
+ "loss": 1.0703,
43577
+ "step": 6224
43578
+ },
43579
+ {
43580
+ "epoch": 0.9389140271493213,
43581
+ "grad_norm": 1.8449335098266602,
43582
+ "learning_rate": 9.461260576494046e-07,
43583
+ "loss": 0.983,
43584
+ "step": 6225
43585
+ },
43586
+ {
43587
+ "epoch": 0.9390648567119155,
43588
+ "grad_norm": 1.7894610166549683,
43589
+ "learning_rate": 9.414742984771241e-07,
43590
+ "loss": 0.8819,
43591
+ "step": 6226
43592
+ },
43593
+ {
43594
+ "epoch": 0.9392156862745098,
43595
+ "grad_norm": 2.2926836013793945,
43596
+ "learning_rate": 9.368338943238964e-07,
43597
+ "loss": 1.1588,
43598
+ "step": 6227
43599
+ },
43600
+ {
43601
+ "epoch": 0.9393665158371041,
43602
+ "grad_norm": 2.1777687072753906,
43603
+ "learning_rate": 9.322048462637789e-07,
43604
+ "loss": 1.3449,
43605
+ "step": 6228
43606
+ },
43607
+ {
43608
+ "epoch": 0.9395173453996983,
43609
+ "grad_norm": 1.7373837232589722,
43610
+ "learning_rate": 9.275871553682092e-07,
43611
+ "loss": 0.938,
43612
+ "step": 6229
43613
+ },
43614
+ {
43615
+ "epoch": 0.9396681749622926,
43616
+ "grad_norm": 1.9276561737060547,
43617
+ "learning_rate": 9.229808227059878e-07,
43618
+ "loss": 0.9975,
43619
+ "step": 6230
43620
+ },
43621
+ {
43622
+ "epoch": 0.9398190045248869,
43623
+ "grad_norm": 1.7900891304016113,
43624
+ "learning_rate": 9.183858493432895e-07,
43625
+ "loss": 0.8548,
43626
+ "step": 6231
43627
+ },
43628
+ {
43629
+ "epoch": 0.9399698340874811,
43630
+ "grad_norm": 2.07910418510437,
43631
+ "learning_rate": 9.138022363436638e-07,
43632
+ "loss": 1.1805,
43633
+ "step": 6232
43634
+ },
43635
+ {
43636
+ "epoch": 0.9401206636500754,
43637
+ "grad_norm": 1.7643636465072632,
43638
+ "learning_rate": 9.09229984768023e-07,
43639
+ "loss": 0.991,
43640
+ "step": 6233
43641
+ },
43642
+ {
43643
+ "epoch": 0.9402714932126697,
43644
+ "grad_norm": 1.8613131046295166,
43645
+ "learning_rate": 9.046690956746595e-07,
43646
+ "loss": 1.0485,
43647
+ "step": 6234
43648
+ },
43649
+ {
43650
+ "epoch": 0.9404223227752639,
43651
+ "grad_norm": 2.2653021812438965,
43652
+ "learning_rate": 9.001195701192289e-07,
43653
+ "loss": 1.2899,
43654
+ "step": 6235
43655
+ },
43656
+ {
43657
+ "epoch": 0.9405731523378582,
43658
+ "grad_norm": 2.3277347087860107,
43659
+ "learning_rate": 8.955814091547609e-07,
43660
+ "loss": 1.4375,
43661
+ "step": 6236
43662
+ },
43663
+ {
43664
+ "epoch": 0.9407239819004525,
43665
+ "grad_norm": 2.1147515773773193,
43666
+ "learning_rate": 8.910546138316433e-07,
43667
+ "loss": 1.1043,
43668
+ "step": 6237
43669
+ },
43670
+ {
43671
+ "epoch": 0.9408748114630467,
43672
+ "grad_norm": 1.4688125848770142,
43673
+ "learning_rate": 8.865391851976491e-07,
43674
+ "loss": 0.6265,
43675
+ "step": 6238
43676
+ },
43677
+ {
43678
+ "epoch": 0.941025641025641,
43679
+ "grad_norm": 1.7932592630386353,
43680
+ "learning_rate": 8.820351242979141e-07,
43681
+ "loss": 0.8438,
43682
+ "step": 6239
43683
+ },
43684
+ {
43685
+ "epoch": 0.9411764705882353,
43686
+ "grad_norm": 2.096954107284546,
43687
+ "learning_rate": 8.775424321749382e-07,
43688
+ "loss": 1.0978,
43689
+ "step": 6240
43690
+ },
43691
+ {
43692
+ "epoch": 0.9413273001508295,
43693
+ "grad_norm": 2.0427722930908203,
43694
+ "learning_rate": 8.730611098685948e-07,
43695
+ "loss": 1.0328,
43696
+ "step": 6241
43697
+ },
43698
+ {
43699
+ "epoch": 0.9414781297134238,
43700
+ "grad_norm": 1.7261981964111328,
43701
+ "learning_rate": 8.685911584161266e-07,
43702
+ "loss": 0.7771,
43703
+ "step": 6242
43704
+ },
43705
+ {
43706
+ "epoch": 0.9416289592760181,
43707
+ "grad_norm": 2.103815793991089,
43708
+ "learning_rate": 8.641325788521393e-07,
43709
+ "loss": 0.998,
43710
+ "step": 6243
43711
+ },
43712
+ {
43713
+ "epoch": 0.9417797888386124,
43714
+ "grad_norm": 2.4655301570892334,
43715
+ "learning_rate": 8.596853722086074e-07,
43716
+ "loss": 1.237,
43717
+ "step": 6244
43718
+ },
43719
+ {
43720
+ "epoch": 0.9419306184012066,
43721
+ "grad_norm": 2.7287237644195557,
43722
+ "learning_rate": 8.552495395148852e-07,
43723
+ "loss": 1.4164,
43724
+ "step": 6245
43725
+ },
43726
+ {
43727
+ "epoch": 0.9420814479638009,
43728
+ "grad_norm": 1.7122939825057983,
43729
+ "learning_rate": 8.508250817976737e-07,
43730
+ "loss": 0.9301,
43731
+ "step": 6246
43732
+ },
43733
+ {
43734
+ "epoch": 0.9422322775263952,
43735
+ "grad_norm": 2.004281997680664,
43736
+ "learning_rate": 8.464120000810538e-07,
43737
+ "loss": 1.1161,
43738
+ "step": 6247
43739
+ },
43740
+ {
43741
+ "epoch": 0.9423831070889894,
43742
+ "grad_norm": 1.7664074897766113,
43743
+ "learning_rate": 8.420102953864806e-07,
43744
+ "loss": 0.8845,
43745
+ "step": 6248
43746
+ },
43747
+ {
43748
+ "epoch": 0.9425339366515837,
43749
+ "grad_norm": 1.9992055892944336,
43750
+ "learning_rate": 8.376199687327558e-07,
43751
+ "loss": 0.8977,
43752
+ "step": 6249
43753
+ },
43754
+ {
43755
+ "epoch": 0.942684766214178,
43756
+ "grad_norm": 1.8883174657821655,
43757
+ "learning_rate": 8.332410211360609e-07,
43758
+ "loss": 0.8384,
43759
+ "step": 6250
43760
+ },
43761
+ {
43762
+ "epoch": 0.9428355957767722,
43763
+ "grad_norm": 1.8478468656539917,
43764
+ "learning_rate": 8.288734536099408e-07,
43765
+ "loss": 1.2085,
43766
+ "step": 6251
43767
+ },
43768
+ {
43769
+ "epoch": 0.9429864253393665,
43770
+ "grad_norm": 1.9000264406204224,
43771
+ "learning_rate": 8.245172671653145e-07,
43772
+ "loss": 1.1676,
43773
+ "step": 6252
43774
+ },
43775
+ {
43776
+ "epoch": 0.9431372549019608,
43777
+ "grad_norm": 1.8996500968933105,
43778
+ "learning_rate": 8.201724628104535e-07,
43779
+ "loss": 1.1095,
43780
+ "step": 6253
43781
+ },
43782
+ {
43783
+ "epoch": 0.943288084464555,
43784
+ "grad_norm": 1.6948037147521973,
43785
+ "learning_rate": 8.15839041550992e-07,
43786
+ "loss": 0.9271,
43787
+ "step": 6254
43788
+ },
43789
+ {
43790
+ "epoch": 0.9434389140271493,
43791
+ "grad_norm": 1.878197193145752,
43792
+ "learning_rate": 8.115170043899501e-07,
43793
+ "loss": 0.9737,
43794
+ "step": 6255
43795
+ },
43796
+ {
43797
+ "epoch": 0.9435897435897436,
43798
+ "grad_norm": 2.2108347415924072,
43799
+ "learning_rate": 8.07206352327694e-07,
43800
+ "loss": 1.4355,
43801
+ "step": 6256
43802
+ },
43803
+ {
43804
+ "epoch": 0.9437405731523378,
43805
+ "grad_norm": 1.8024992942810059,
43806
+ "learning_rate": 8.029070863619648e-07,
43807
+ "loss": 1.1714,
43808
+ "step": 6257
43809
+ },
43810
+ {
43811
+ "epoch": 0.9438914027149321,
43812
+ "grad_norm": 1.9239073991775513,
43813
+ "learning_rate": 7.986192074878607e-07,
43814
+ "loss": 0.9482,
43815
+ "step": 6258
43816
+ },
43817
+ {
43818
+ "epoch": 0.9440422322775264,
43819
+ "grad_norm": 1.612963318824768,
43820
+ "learning_rate": 7.943427166978546e-07,
43821
+ "loss": 0.9701,
43822
+ "step": 6259
43823
+ },
43824
+ {
43825
+ "epoch": 0.9441930618401206,
43826
+ "grad_norm": 2.360172748565674,
43827
+ "learning_rate": 7.900776149817713e-07,
43828
+ "loss": 1.3821,
43829
+ "step": 6260
43830
+ },
43831
+ {
43832
+ "epoch": 0.9443438914027149,
43833
+ "grad_norm": 1.8003296852111816,
43834
+ "learning_rate": 7.8582390332681e-07,
43835
+ "loss": 0.9967,
43836
+ "step": 6261
43837
+ },
43838
+ {
43839
+ "epoch": 0.9444947209653092,
43840
+ "grad_norm": 1.9901286363601685,
43841
+ "learning_rate": 7.815815827175221e-07,
43842
+ "loss": 1.2425,
43843
+ "step": 6262
43844
+ },
43845
+ {
43846
+ "epoch": 0.9446455505279034,
43847
+ "grad_norm": 2.1474413871765137,
43848
+ "learning_rate": 7.773506541358333e-07,
43849
+ "loss": 1.2224,
43850
+ "step": 6263
43851
+ },
43852
+ {
43853
+ "epoch": 0.9447963800904977,
43854
+ "grad_norm": 1.9672069549560547,
43855
+ "learning_rate": 7.731311185610268e-07,
43856
+ "loss": 1.1663,
43857
+ "step": 6264
43858
+ },
43859
+ {
43860
+ "epoch": 0.944947209653092,
43861
+ "grad_norm": 1.8437694311141968,
43862
+ "learning_rate": 7.689229769697604e-07,
43863
+ "loss": 0.9525,
43864
+ "step": 6265
43865
+ },
43866
+ {
43867
+ "epoch": 0.9450980392156862,
43868
+ "grad_norm": 1.9961206912994385,
43869
+ "learning_rate": 7.64726230336027e-07,
43870
+ "loss": 1.0964,
43871
+ "step": 6266
43872
+ },
43873
+ {
43874
+ "epoch": 0.9452488687782805,
43875
+ "grad_norm": 1.8415944576263428,
43876
+ "learning_rate": 7.605408796312164e-07,
43877
+ "loss": 1.0519,
43878
+ "step": 6267
43879
+ },
43880
+ {
43881
+ "epoch": 0.9453996983408748,
43882
+ "grad_norm": 1.9274911880493164,
43883
+ "learning_rate": 7.563669258240535e-07,
43884
+ "loss": 1.1,
43885
+ "step": 6268
43886
+ },
43887
+ {
43888
+ "epoch": 0.945550527903469,
43889
+ "grad_norm": 1.9660152196884155,
43890
+ "learning_rate": 7.522043698806436e-07,
43891
+ "loss": 1.1443,
43892
+ "step": 6269
43893
+ },
43894
+ {
43895
+ "epoch": 0.9457013574660633,
43896
+ "grad_norm": 1.7958331108093262,
43897
+ "learning_rate": 7.480532127644435e-07,
43898
+ "loss": 0.9059,
43899
+ "step": 6270
43900
+ },
43901
+ {
43902
+ "epoch": 0.9458521870286576,
43903
+ "grad_norm": 2.053165912628174,
43904
+ "learning_rate": 7.439134554362681e-07,
43905
+ "loss": 1.2559,
43906
+ "step": 6271
43907
+ },
43908
+ {
43909
+ "epoch": 0.9460030165912519,
43910
+ "grad_norm": 1.9057128429412842,
43911
+ "learning_rate": 7.397850988543065e-07,
43912
+ "loss": 1.0629,
43913
+ "step": 6272
43914
+ },
43915
+ {
43916
+ "epoch": 0.9461538461538461,
43917
+ "grad_norm": 1.7405214309692383,
43918
+ "learning_rate": 7.356681439740998e-07,
43919
+ "loss": 0.958,
43920
+ "step": 6273
43921
+ },
43922
+ {
43923
+ "epoch": 0.9463046757164404,
43924
+ "grad_norm": 1.8470877408981323,
43925
+ "learning_rate": 7.315625917485525e-07,
43926
+ "loss": 0.9393,
43927
+ "step": 6274
43928
+ },
43929
+ {
43930
+ "epoch": 0.9464555052790347,
43931
+ "grad_norm": 1.862983226776123,
43932
+ "learning_rate": 7.274684431279378e-07,
43933
+ "loss": 0.8262,
43934
+ "step": 6275
43935
+ },
43936
+ {
43937
+ "epoch": 0.9466063348416289,
43938
+ "grad_norm": 2.055889844894409,
43939
+ "learning_rate": 7.233856990598697e-07,
43940
+ "loss": 1.1398,
43941
+ "step": 6276
43942
+ },
43943
+ {
43944
+ "epoch": 0.9467571644042232,
43945
+ "grad_norm": 1.97694730758667,
43946
+ "learning_rate": 7.193143604893426e-07,
43947
+ "loss": 1.0823,
43948
+ "step": 6277
43949
+ },
43950
+ {
43951
+ "epoch": 0.9469079939668175,
43952
+ "grad_norm": 1.692892074584961,
43953
+ "learning_rate": 7.152544283586971e-07,
43954
+ "loss": 0.7331,
43955
+ "step": 6278
43956
+ },
43957
+ {
43958
+ "epoch": 0.9470588235294117,
43959
+ "grad_norm": 1.8486021757125854,
43960
+ "learning_rate": 7.11205903607648e-07,
43961
+ "loss": 0.8909,
43962
+ "step": 6279
43963
+ },
43964
+ {
43965
+ "epoch": 0.947209653092006,
43966
+ "grad_norm": 2.050459146499634,
43967
+ "learning_rate": 7.071687871732513e-07,
43968
+ "loss": 1.0519,
43969
+ "step": 6280
43970
+ },
43971
+ {
43972
+ "epoch": 0.9473604826546003,
43973
+ "grad_norm": 2.027438163757324,
43974
+ "learning_rate": 7.03143079989943e-07,
43975
+ "loss": 1.1911,
43976
+ "step": 6281
43977
+ },
43978
+ {
43979
+ "epoch": 0.9475113122171945,
43980
+ "grad_norm": 2.2774291038513184,
43981
+ "learning_rate": 6.991287829894999e-07,
43982
+ "loss": 1.0686,
43983
+ "step": 6282
43984
+ },
43985
+ {
43986
+ "epoch": 0.9476621417797888,
43987
+ "grad_norm": 2.0271224975585938,
43988
+ "learning_rate": 6.951258971010677e-07,
43989
+ "loss": 0.9588,
43990
+ "step": 6283
43991
+ },
43992
+ {
43993
+ "epoch": 0.9478129713423831,
43994
+ "grad_norm": 2.0257952213287354,
43995
+ "learning_rate": 6.911344232511496e-07,
43996
+ "loss": 1.0368,
43997
+ "step": 6284
43998
+ },
43999
+ {
44000
+ "epoch": 0.9479638009049773,
44001
+ "grad_norm": 1.8290969133377075,
44002
+ "learning_rate": 6.871543623636012e-07,
44003
+ "loss": 0.9618,
44004
+ "step": 6285
44005
+ },
44006
+ {
44007
+ "epoch": 0.9481146304675716,
44008
+ "grad_norm": 1.9377784729003906,
44009
+ "learning_rate": 6.831857153596521e-07,
44010
+ "loss": 1.1191,
44011
+ "step": 6286
44012
+ },
44013
+ {
44014
+ "epoch": 0.9482654600301659,
44015
+ "grad_norm": 2.043879270553589,
44016
+ "learning_rate": 6.792284831578733e-07,
44017
+ "loss": 1.1668,
44018
+ "step": 6287
44019
+ },
44020
+ {
44021
+ "epoch": 0.9484162895927601,
44022
+ "grad_norm": 1.8056532144546509,
44023
+ "learning_rate": 6.752826666742041e-07,
44024
+ "loss": 0.7392,
44025
+ "step": 6288
44026
+ },
44027
+ {
44028
+ "epoch": 0.9485671191553544,
44029
+ "grad_norm": 2.201486349105835,
44030
+ "learning_rate": 6.713482668219362e-07,
44031
+ "loss": 1.4299,
44032
+ "step": 6289
44033
+ },
44034
+ {
44035
+ "epoch": 0.9487179487179487,
44036
+ "grad_norm": 2.0214931964874268,
44037
+ "learning_rate": 6.674252845117191e-07,
44038
+ "loss": 1.1641,
44039
+ "step": 6290
44040
+ },
44041
+ {
44042
+ "epoch": 0.948868778280543,
44043
+ "grad_norm": 2.255383253097534,
44044
+ "learning_rate": 6.63513720651554e-07,
44045
+ "loss": 1.1026,
44046
+ "step": 6291
44047
+ },
44048
+ {
44049
+ "epoch": 0.9490196078431372,
44050
+ "grad_norm": 2.57189679145813,
44051
+ "learning_rate": 6.596135761468225e-07,
44052
+ "loss": 1.478,
44053
+ "step": 6292
44054
+ },
44055
+ {
44056
+ "epoch": 0.9491704374057315,
44057
+ "grad_norm": 2.4986143112182617,
44058
+ "learning_rate": 6.557248519002302e-07,
44059
+ "loss": 1.6896,
44060
+ "step": 6293
44061
+ },
44062
+ {
44063
+ "epoch": 0.9493212669683257,
44064
+ "grad_norm": 2.127760410308838,
44065
+ "learning_rate": 6.51847548811868e-07,
44066
+ "loss": 1.3076,
44067
+ "step": 6294
44068
+ },
44069
+ {
44070
+ "epoch": 0.94947209653092,
44071
+ "grad_norm": 2.108933925628662,
44072
+ "learning_rate": 6.479816677791683e-07,
44073
+ "loss": 1.0977,
44074
+ "step": 6295
44075
+ },
44076
+ {
44077
+ "epoch": 0.9496229260935143,
44078
+ "grad_norm": 1.5710909366607666,
44079
+ "learning_rate": 6.441272096969153e-07,
44080
+ "loss": 0.6389,
44081
+ "step": 6296
44082
+ },
44083
+ {
44084
+ "epoch": 0.9497737556561086,
44085
+ "grad_norm": 1.3058656454086304,
44086
+ "learning_rate": 6.402841754572675e-07,
44087
+ "loss": 0.5123,
44088
+ "step": 6297
44089
+ },
44090
+ {
44091
+ "epoch": 0.9499245852187028,
44092
+ "grad_norm": 1.7123138904571533,
44093
+ "learning_rate": 6.364525659497189e-07,
44094
+ "loss": 0.7761,
44095
+ "step": 6298
44096
+ },
44097
+ {
44098
+ "epoch": 0.9500754147812971,
44099
+ "grad_norm": 1.6542720794677734,
44100
+ "learning_rate": 6.326323820611379e-07,
44101
+ "loss": 0.7791,
44102
+ "step": 6299
44103
+ },
44104
+ {
44105
+ "epoch": 0.9502262443438914,
44106
+ "grad_norm": 2.116077184677124,
44107
+ "learning_rate": 6.288236246757284e-07,
44108
+ "loss": 1.0661,
44109
+ "step": 6300
44110
+ },
44111
+ {
44112
+ "epoch": 0.9503770739064856,
44113
+ "grad_norm": 1.7709579467773438,
44114
+ "learning_rate": 6.250262946750685e-07,
44115
+ "loss": 1.0497,
44116
+ "step": 6301
44117
+ },
44118
+ {
44119
+ "epoch": 0.9505279034690799,
44120
+ "grad_norm": 1.7317759990692139,
44121
+ "learning_rate": 6.212403929380772e-07,
44122
+ "loss": 1.007,
44123
+ "step": 6302
44124
+ },
44125
+ {
44126
+ "epoch": 0.9506787330316742,
44127
+ "grad_norm": 1.5356085300445557,
44128
+ "learning_rate": 6.174659203410371e-07,
44129
+ "loss": 0.8161,
44130
+ "step": 6303
44131
+ },
44132
+ {
44133
+ "epoch": 0.9508295625942684,
44134
+ "grad_norm": 1.9997459650039673,
44135
+ "learning_rate": 6.137028777575826e-07,
44136
+ "loss": 1.3262,
44137
+ "step": 6304
44138
+ },
44139
+ {
44140
+ "epoch": 0.9509803921568627,
44141
+ "grad_norm": 1.7967150211334229,
44142
+ "learning_rate": 6.099512660587059e-07,
44143
+ "loss": 1.1945,
44144
+ "step": 6305
44145
+ },
44146
+ {
44147
+ "epoch": 0.951131221719457,
44148
+ "grad_norm": 1.8450052738189697,
44149
+ "learning_rate": 6.062110861127402e-07,
44150
+ "loss": 1.1252,
44151
+ "step": 6306
44152
+ },
44153
+ {
44154
+ "epoch": 0.9512820512820512,
44155
+ "grad_norm": 1.8019013404846191,
44156
+ "learning_rate": 6.024823387853928e-07,
44157
+ "loss": 0.8986,
44158
+ "step": 6307
44159
+ },
44160
+ {
44161
+ "epoch": 0.9514328808446455,
44162
+ "grad_norm": 1.6633667945861816,
44163
+ "learning_rate": 5.987650249397125e-07,
44164
+ "loss": 0.9361,
44165
+ "step": 6308
44166
+ },
44167
+ {
44168
+ "epoch": 0.9515837104072398,
44169
+ "grad_norm": 1.9914895296096802,
44170
+ "learning_rate": 5.950591454360943e-07,
44171
+ "loss": 1.2805,
44172
+ "step": 6309
44173
+ },
44174
+ {
44175
+ "epoch": 0.951734539969834,
44176
+ "grad_norm": 1.905820608139038,
44177
+ "learning_rate": 5.913647011323075e-07,
44178
+ "loss": 1.1174,
44179
+ "step": 6310
44180
+ },
44181
+ {
44182
+ "epoch": 0.9518853695324283,
44183
+ "grad_norm": 1.832431435585022,
44184
+ "learning_rate": 5.876816928834572e-07,
44185
+ "loss": 1.1125,
44186
+ "step": 6311
44187
+ },
44188
+ {
44189
+ "epoch": 0.9520361990950226,
44190
+ "grad_norm": 1.762558102607727,
44191
+ "learning_rate": 5.840101215420057e-07,
44192
+ "loss": 1.0359,
44193
+ "step": 6312
44194
+ },
44195
+ {
44196
+ "epoch": 0.9521870286576168,
44197
+ "grad_norm": 1.7604293823242188,
44198
+ "learning_rate": 5.803499879577734e-07,
44199
+ "loss": 1.0938,
44200
+ "step": 6313
44201
+ },
44202
+ {
44203
+ "epoch": 0.9523378582202111,
44204
+ "grad_norm": 2.041506767272949,
44205
+ "learning_rate": 5.767012929779325e-07,
44206
+ "loss": 1.0922,
44207
+ "step": 6314
44208
+ },
44209
+ {
44210
+ "epoch": 0.9524886877828054,
44211
+ "grad_norm": 1.8552873134613037,
44212
+ "learning_rate": 5.73064037447002e-07,
44213
+ "loss": 0.9393,
44214
+ "step": 6315
44215
+ },
44216
+ {
44217
+ "epoch": 0.9526395173453996,
44218
+ "grad_norm": 1.5460180044174194,
44219
+ "learning_rate": 5.694382222068528e-07,
44220
+ "loss": 0.8098,
44221
+ "step": 6316
44222
+ },
44223
+ {
44224
+ "epoch": 0.9527903469079939,
44225
+ "grad_norm": 1.542466640472412,
44226
+ "learning_rate": 5.658238480967137e-07,
44227
+ "loss": 0.7045,
44228
+ "step": 6317
44229
+ },
44230
+ {
44231
+ "epoch": 0.9529411764705882,
44232
+ "grad_norm": 1.8291473388671875,
44233
+ "learning_rate": 5.622209159531655e-07,
44234
+ "loss": 1.1387,
44235
+ "step": 6318
44236
+ },
44237
+ {
44238
+ "epoch": 0.9530920060331824,
44239
+ "grad_norm": 1.950031042098999,
44240
+ "learning_rate": 5.586294266101355e-07,
44241
+ "loss": 1.0013,
44242
+ "step": 6319
44243
+ },
44244
+ {
44245
+ "epoch": 0.9532428355957768,
44246
+ "grad_norm": 1.5500978231430054,
44247
+ "learning_rate": 5.550493808989032e-07,
44248
+ "loss": 0.873,
44249
+ "step": 6320
44250
+ },
44251
+ {
44252
+ "epoch": 0.9533936651583711,
44253
+ "grad_norm": 2.1455116271972656,
44254
+ "learning_rate": 5.514807796481003e-07,
44255
+ "loss": 1.3643,
44256
+ "step": 6321
44257
+ },
44258
+ {
44259
+ "epoch": 0.9535444947209654,
44260
+ "grad_norm": 1.77354896068573,
44261
+ "learning_rate": 5.47923623683716e-07,
44262
+ "loss": 0.8939,
44263
+ "step": 6322
44264
+ },
44265
+ {
44266
+ "epoch": 0.9536953242835596,
44267
+ "grad_norm": 1.7942193746566772,
44268
+ "learning_rate": 5.443779138290806e-07,
44269
+ "loss": 0.8917,
44270
+ "step": 6323
44271
+ },
44272
+ {
44273
+ "epoch": 0.9538461538461539,
44274
+ "grad_norm": 1.7935692071914673,
44275
+ "learning_rate": 5.408436509048819e-07,
44276
+ "loss": 0.839,
44277
+ "step": 6324
44278
+ },
44279
+ {
44280
+ "epoch": 0.9539969834087482,
44281
+ "grad_norm": 1.6746853590011597,
44282
+ "learning_rate": 5.373208357291493e-07,
44283
+ "loss": 0.7705,
44284
+ "step": 6325
44285
+ },
44286
+ {
44287
+ "epoch": 0.9541478129713424,
44288
+ "grad_norm": 1.8481478691101074,
44289
+ "learning_rate": 5.338094691172801e-07,
44290
+ "loss": 0.925,
44291
+ "step": 6326
44292
+ },
44293
+ {
44294
+ "epoch": 0.9542986425339367,
44295
+ "grad_norm": 1.7488939762115479,
44296
+ "learning_rate": 5.303095518819967e-07,
44297
+ "loss": 0.8289,
44298
+ "step": 6327
44299
+ },
44300
+ {
44301
+ "epoch": 0.954449472096531,
44302
+ "grad_norm": 2.2121334075927734,
44303
+ "learning_rate": 5.268210848333954e-07,
44304
+ "loss": 1.4356,
44305
+ "step": 6328
44306
+ },
44307
+ {
44308
+ "epoch": 0.9546003016591252,
44309
+ "grad_norm": 1.872408151626587,
44310
+ "learning_rate": 5.233440687789082e-07,
44311
+ "loss": 0.9361,
44312
+ "step": 6329
44313
+ },
44314
+ {
44315
+ "epoch": 0.9547511312217195,
44316
+ "grad_norm": 2.0881173610687256,
44317
+ "learning_rate": 5.198785045233245e-07,
44318
+ "loss": 1.451,
44319
+ "step": 6330
44320
+ },
44321
+ {
44322
+ "epoch": 0.9549019607843138,
44323
+ "grad_norm": 1.886342167854309,
44324
+ "learning_rate": 5.164243928687695e-07,
44325
+ "loss": 0.8034,
44326
+ "step": 6331
44327
+ },
44328
+ {
44329
+ "epoch": 0.955052790346908,
44330
+ "grad_norm": 2.0379743576049805,
44331
+ "learning_rate": 5.129817346147369e-07,
44332
+ "loss": 1.2724,
44333
+ "step": 6332
44334
+ },
44335
+ {
44336
+ "epoch": 0.9552036199095023,
44337
+ "grad_norm": 1.8097858428955078,
44338
+ "learning_rate": 5.095505305580561e-07,
44339
+ "loss": 0.9383,
44340
+ "step": 6333
44341
+ },
44342
+ {
44343
+ "epoch": 0.9553544494720966,
44344
+ "grad_norm": 1.8055602312088013,
44345
+ "learning_rate": 5.061307814929028e-07,
44346
+ "loss": 0.9776,
44347
+ "step": 6334
44348
+ },
44349
+ {
44350
+ "epoch": 0.9555052790346908,
44351
+ "grad_norm": 1.750784993171692,
44352
+ "learning_rate": 5.027224882108216e-07,
44353
+ "loss": 0.6974,
44354
+ "step": 6335
44355
+ },
44356
+ {
44357
+ "epoch": 0.9556561085972851,
44358
+ "grad_norm": 1.8835489749908447,
44359
+ "learning_rate": 4.993256515006817e-07,
44360
+ "loss": 0.9747,
44361
+ "step": 6336
44362
+ },
44363
+ {
44364
+ "epoch": 0.9558069381598794,
44365
+ "grad_norm": 2.237809658050537,
44366
+ "learning_rate": 4.959402721487094e-07,
44367
+ "loss": 1.1552,
44368
+ "step": 6337
44369
+ },
44370
+ {
44371
+ "epoch": 0.9559577677224737,
44372
+ "grad_norm": 2.2509021759033203,
44373
+ "learning_rate": 4.925663509384782e-07,
44374
+ "loss": 1.2622,
44375
+ "step": 6338
44376
+ },
44377
+ {
44378
+ "epoch": 0.9561085972850679,
44379
+ "grad_norm": 1.9245855808258057,
44380
+ "learning_rate": 4.892038886509242e-07,
44381
+ "loss": 0.9188,
44382
+ "step": 6339
44383
+ },
44384
+ {
44385
+ "epoch": 0.9562594268476622,
44386
+ "grad_norm": 2.145233392715454,
44387
+ "learning_rate": 4.85852886064303e-07,
44388
+ "loss": 1.0986,
44389
+ "step": 6340
44390
+ },
44391
+ {
44392
+ "epoch": 0.9564102564102565,
44393
+ "grad_norm": 1.804188847541809,
44394
+ "learning_rate": 4.825133439542385e-07,
44395
+ "loss": 0.923,
44396
+ "step": 6341
44397
+ },
44398
+ {
44399
+ "epoch": 0.9565610859728507,
44400
+ "grad_norm": 1.800316572189331,
44401
+ "learning_rate": 4.79185263093701e-07,
44402
+ "loss": 0.9414,
44403
+ "step": 6342
44404
+ },
44405
+ {
44406
+ "epoch": 0.956711915535445,
44407
+ "grad_norm": 2.021705389022827,
44408
+ "learning_rate": 4.758686442529969e-07,
44409
+ "loss": 0.9034,
44410
+ "step": 6343
44411
+ },
44412
+ {
44413
+ "epoch": 0.9568627450980393,
44414
+ "grad_norm": 1.8886138200759888,
44415
+ "learning_rate": 4.725634881997898e-07,
44416
+ "loss": 0.9878,
44417
+ "step": 6344
44418
+ },
44419
+ {
44420
+ "epoch": 0.9570135746606335,
44421
+ "grad_norm": 2.104884386062622,
44422
+ "learning_rate": 4.692697956990899e-07,
44423
+ "loss": 1.074,
44424
+ "step": 6345
44425
+ },
44426
+ {
44427
+ "epoch": 0.9571644042232278,
44428
+ "grad_norm": 2.120182991027832,
44429
+ "learning_rate": 4.6598756751324857e-07,
44430
+ "loss": 0.9175,
44431
+ "step": 6346
44432
+ },
44433
+ {
44434
+ "epoch": 0.9573152337858221,
44435
+ "grad_norm": 1.8088730573654175,
44436
+ "learning_rate": 4.6271680440195806e-07,
44437
+ "loss": 0.8787,
44438
+ "step": 6347
44439
+ },
44440
+ {
44441
+ "epoch": 0.9574660633484163,
44442
+ "grad_norm": 1.7701196670532227,
44443
+ "learning_rate": 4.59457507122274e-07,
44444
+ "loss": 0.7213,
44445
+ "step": 6348
44446
+ },
44447
+ {
44448
+ "epoch": 0.9576168929110106,
44449
+ "grad_norm": 2.178802967071533,
44450
+ "learning_rate": 4.562096764285817e-07,
44451
+ "loss": 0.9971,
44452
+ "step": 6349
44453
+ },
44454
+ {
44455
+ "epoch": 0.9577677224736049,
44456
+ "grad_norm": 1.5018692016601562,
44457
+ "learning_rate": 4.529733130726299e-07,
44458
+ "loss": 0.5859,
44459
+ "step": 6350
44460
+ },
44461
+ {
44462
+ "epoch": 0.9579185520361991,
44463
+ "grad_norm": 1.998622179031372,
44464
+ "learning_rate": 4.4974841780349163e-07,
44465
+ "loss": 1.3461,
44466
+ "step": 6351
44467
+ },
44468
+ {
44469
+ "epoch": 0.9580693815987934,
44470
+ "grad_norm": 1.9802663326263428,
44471
+ "learning_rate": 4.465349913676031e-07,
44472
+ "loss": 1.187,
44473
+ "step": 6352
44474
+ },
44475
+ {
44476
+ "epoch": 0.9582202111613877,
44477
+ "grad_norm": 1.7344826459884644,
44478
+ "learning_rate": 4.4333303450873607e-07,
44479
+ "loss": 0.9543,
44480
+ "step": 6353
44481
+ },
44482
+ {
44483
+ "epoch": 0.9583710407239819,
44484
+ "grad_norm": 1.9702990055084229,
44485
+ "learning_rate": 4.401425479680199e-07,
44486
+ "loss": 1.2531,
44487
+ "step": 6354
44488
+ },
44489
+ {
44490
+ "epoch": 0.9585218702865762,
44491
+ "grad_norm": 2.1435110569000244,
44492
+ "learning_rate": 4.369635324839083e-07,
44493
+ "loss": 1.2078,
44494
+ "step": 6355
44495
+ },
44496
+ {
44497
+ "epoch": 0.9586726998491705,
44498
+ "grad_norm": 1.9066894054412842,
44499
+ "learning_rate": 4.3379598879221825e-07,
44500
+ "loss": 1.3334,
44501
+ "step": 6356
44502
+ },
44503
+ {
44504
+ "epoch": 0.9588235294117647,
44505
+ "grad_norm": 1.7345263957977295,
44506
+ "learning_rate": 4.306399176261022e-07,
44507
+ "loss": 0.9146,
44508
+ "step": 6357
44509
+ },
44510
+ {
44511
+ "epoch": 0.958974358974359,
44512
+ "grad_norm": 1.7452151775360107,
44513
+ "learning_rate": 4.274953197160647e-07,
44514
+ "loss": 1.0763,
44515
+ "step": 6358
44516
+ },
44517
+ {
44518
+ "epoch": 0.9591251885369533,
44519
+ "grad_norm": 1.8526966571807861,
44520
+ "learning_rate": 4.243621957899457e-07,
44521
+ "loss": 1.201,
44522
+ "step": 6359
44523
+ },
44524
+ {
44525
+ "epoch": 0.9592760180995475,
44526
+ "grad_norm": 1.969795823097229,
44527
+ "learning_rate": 4.2124054657293187e-07,
44528
+ "loss": 1.0364,
44529
+ "step": 6360
44530
+ },
44531
+ {
44532
+ "epoch": 0.9594268476621418,
44533
+ "grad_norm": 1.8643343448638916,
44534
+ "learning_rate": 4.1813037278756184e-07,
44535
+ "loss": 0.9283,
44536
+ "step": 6361
44537
+ },
44538
+ {
44539
+ "epoch": 0.9595776772247361,
44540
+ "grad_norm": 1.9766918420791626,
44541
+ "learning_rate": 4.1503167515370976e-07,
44542
+ "loss": 1.1157,
44543
+ "step": 6362
44544
+ },
44545
+ {
44546
+ "epoch": 0.9597285067873303,
44547
+ "grad_norm": 1.9669005870819092,
44548
+ "learning_rate": 4.119444543885964e-07,
44549
+ "loss": 1.1146,
44550
+ "step": 6363
44551
+ },
44552
+ {
44553
+ "epoch": 0.9598793363499246,
44554
+ "grad_norm": 1.802460789680481,
44555
+ "learning_rate": 4.0886871120678903e-07,
44556
+ "loss": 1.0177,
44557
+ "step": 6364
44558
+ },
44559
+ {
44560
+ "epoch": 0.9600301659125189,
44561
+ "grad_norm": 1.8280365467071533,
44562
+ "learning_rate": 4.058044463201849e-07,
44563
+ "loss": 1.0281,
44564
+ "step": 6365
44565
+ },
44566
+ {
44567
+ "epoch": 0.9601809954751132,
44568
+ "grad_norm": 1.9677125215530396,
44569
+ "learning_rate": 4.027516604380388e-07,
44570
+ "loss": 1.0923,
44571
+ "step": 6366
44572
+ },
44573
+ {
44574
+ "epoch": 0.9603318250377074,
44575
+ "grad_norm": 2.0572264194488525,
44576
+ "learning_rate": 3.9971035426695226e-07,
44577
+ "loss": 1.2239,
44578
+ "step": 6367
44579
+ },
44580
+ {
44581
+ "epoch": 0.9604826546003017,
44582
+ "grad_norm": 2.10935115814209,
44583
+ "learning_rate": 3.96680528510851e-07,
44584
+ "loss": 1.2413,
44585
+ "step": 6368
44586
+ },
44587
+ {
44588
+ "epoch": 0.960633484162896,
44589
+ "grad_norm": 1.6900520324707031,
44590
+ "learning_rate": 3.936621838710186e-07,
44591
+ "loss": 0.8827,
44592
+ "step": 6369
44593
+ },
44594
+ {
44595
+ "epoch": 0.9607843137254902,
44596
+ "grad_norm": 2.0149905681610107,
44597
+ "learning_rate": 3.906553210460795e-07,
44598
+ "loss": 0.9325,
44599
+ "step": 6370
44600
+ },
44601
+ {
44602
+ "epoch": 0.9609351432880845,
44603
+ "grad_norm": 1.7919515371322632,
44604
+ "learning_rate": 3.8765994073199366e-07,
44605
+ "loss": 0.8165,
44606
+ "step": 6371
44607
+ },
44608
+ {
44609
+ "epoch": 0.9610859728506788,
44610
+ "grad_norm": 1.882252812385559,
44611
+ "learning_rate": 3.8467604362206753e-07,
44612
+ "loss": 1.0458,
44613
+ "step": 6372
44614
+ },
44615
+ {
44616
+ "epoch": 0.961236802413273,
44617
+ "grad_norm": 1.703015923500061,
44618
+ "learning_rate": 3.8170363040695435e-07,
44619
+ "loss": 0.8059,
44620
+ "step": 6373
44621
+ },
44622
+ {
44623
+ "epoch": 0.9613876319758673,
44624
+ "grad_norm": 1.6983740329742432,
44625
+ "learning_rate": 3.787427017746481e-07,
44626
+ "loss": 0.7853,
44627
+ "step": 6374
44628
+ },
44629
+ {
44630
+ "epoch": 0.9615384615384616,
44631
+ "grad_norm": 1.5398797988891602,
44632
+ "learning_rate": 3.757932584104673e-07,
44633
+ "loss": 0.6919,
44634
+ "step": 6375
44635
+ },
44636
+ {
44637
+ "epoch": 0.9616892911010558,
44638
+ "grad_norm": 1.4278013706207275,
44639
+ "learning_rate": 3.728553009970992e-07,
44640
+ "loss": 0.638,
44641
+ "step": 6376
44642
+ },
44643
+ {
44644
+ "epoch": 0.9618401206636501,
44645
+ "grad_norm": 1.8496487140655518,
44646
+ "learning_rate": 3.6992883021455537e-07,
44647
+ "loss": 0.8303,
44648
+ "step": 6377
44649
+ },
44650
+ {
44651
+ "epoch": 0.9619909502262444,
44652
+ "grad_norm": 2.1084578037261963,
44653
+ "learning_rate": 3.6701384674018845e-07,
44654
+ "loss": 1.1322,
44655
+ "step": 6378
44656
+ },
44657
+ {
44658
+ "epoch": 0.9621417797888386,
44659
+ "grad_norm": 1.3949884176254272,
44660
+ "learning_rate": 3.6411035124870317e-07,
44661
+ "loss": 0.6075,
44662
+ "step": 6379
44663
+ },
44664
+ {
44665
+ "epoch": 0.9622926093514329,
44666
+ "grad_norm": 1.922567367553711,
44667
+ "learning_rate": 3.612183444121342e-07,
44668
+ "loss": 1.0072,
44669
+ "step": 6380
44670
+ },
44671
+ {
44672
+ "epoch": 0.9624434389140272,
44673
+ "grad_norm": 1.9455474615097046,
44674
+ "learning_rate": 3.583378268998683e-07,
44675
+ "loss": 0.8225,
44676
+ "step": 6381
44677
+ },
44678
+ {
44679
+ "epoch": 0.9625942684766214,
44680
+ "grad_norm": 2.082988739013672,
44681
+ "learning_rate": 3.55468799378611e-07,
44682
+ "loss": 1.2573,
44683
+ "step": 6382
44684
+ },
44685
+ {
44686
+ "epoch": 0.9627450980392157,
44687
+ "grad_norm": 1.762777328491211,
44688
+ "learning_rate": 3.5261126251244223e-07,
44689
+ "loss": 0.7789,
44690
+ "step": 6383
44691
+ },
44692
+ {
44693
+ "epoch": 0.96289592760181,
44694
+ "grad_norm": 2.2491986751556396,
44695
+ "learning_rate": 3.4976521696274966e-07,
44696
+ "loss": 1.1503,
44697
+ "step": 6384
44698
+ },
44699
+ {
44700
+ "epoch": 0.9630467571644042,
44701
+ "grad_norm": 2.2879385948181152,
44702
+ "learning_rate": 3.4693066338828405e-07,
44703
+ "loss": 1.1606,
44704
+ "step": 6385
44705
+ },
44706
+ {
44707
+ "epoch": 0.9631975867269985,
44708
+ "grad_norm": 1.879817008972168,
44709
+ "learning_rate": 3.441076024451151e-07,
44710
+ "loss": 0.9548,
44711
+ "step": 6386
44712
+ },
44713
+ {
44714
+ "epoch": 0.9633484162895928,
44715
+ "grad_norm": 1.963283658027649,
44716
+ "learning_rate": 3.4129603478668114e-07,
44717
+ "loss": 1.0147,
44718
+ "step": 6387
44719
+ },
44720
+ {
44721
+ "epoch": 0.963499245852187,
44722
+ "grad_norm": 1.8836108446121216,
44723
+ "learning_rate": 3.384959610637284e-07,
44724
+ "loss": 0.9685,
44725
+ "step": 6388
44726
+ },
44727
+ {
44728
+ "epoch": 0.9636500754147813,
44729
+ "grad_norm": 2.1015186309814453,
44730
+ "learning_rate": 3.357073819243661e-07,
44731
+ "loss": 1.3716,
44732
+ "step": 6389
44733
+ },
44734
+ {
44735
+ "epoch": 0.9638009049773756,
44736
+ "grad_norm": 2.008430004119873,
44737
+ "learning_rate": 3.329302980140392e-07,
44738
+ "loss": 1.1204,
44739
+ "step": 6390
44740
+ },
44741
+ {
44742
+ "epoch": 0.9639517345399699,
44743
+ "grad_norm": 1.9310039281845093,
44744
+ "learning_rate": 3.3016470997551675e-07,
44745
+ "loss": 0.9668,
44746
+ "step": 6391
44747
+ },
44748
+ {
44749
+ "epoch": 0.9641025641025641,
44750
+ "grad_norm": 1.8051788806915283,
44751
+ "learning_rate": 3.274106184489312e-07,
44752
+ "loss": 0.8055,
44753
+ "step": 6392
44754
+ },
44755
+ {
44756
+ "epoch": 0.9642533936651584,
44757
+ "grad_norm": 1.9417747259140015,
44758
+ "learning_rate": 3.246680240717226e-07,
44759
+ "loss": 1.1606,
44760
+ "step": 6393
44761
+ },
44762
+ {
44763
+ "epoch": 0.9644042232277527,
44764
+ "grad_norm": 2.2688350677490234,
44765
+ "learning_rate": 3.219369274787054e-07,
44766
+ "loss": 1.18,
44767
+ "step": 6394
44768
+ },
44769
+ {
44770
+ "epoch": 0.9645550527903469,
44771
+ "grad_norm": 2.1807358264923096,
44772
+ "learning_rate": 3.1921732930200734e-07,
44773
+ "loss": 0.9813,
44774
+ "step": 6395
44775
+ },
44776
+ {
44777
+ "epoch": 0.9647058823529412,
44778
+ "grad_norm": 1.7102917432785034,
44779
+ "learning_rate": 3.1650923017110254e-07,
44780
+ "loss": 0.7327,
44781
+ "step": 6396
44782
+ },
44783
+ {
44784
+ "epoch": 0.9648567119155355,
44785
+ "grad_norm": 1.8029088973999023,
44786
+ "learning_rate": 3.1381263071280643e-07,
44787
+ "loss": 0.8494,
44788
+ "step": 6397
44789
+ },
44790
+ {
44791
+ "epoch": 0.9650075414781297,
44792
+ "grad_norm": 1.7781802415847778,
44793
+ "learning_rate": 3.1112753155126963e-07,
44794
+ "loss": 0.9113,
44795
+ "step": 6398
44796
+ },
44797
+ {
44798
+ "epoch": 0.965158371040724,
44799
+ "grad_norm": 1.8984512090682983,
44800
+ "learning_rate": 3.084539333079839e-07,
44801
+ "loss": 0.8357,
44802
+ "step": 6399
44803
+ },
44804
+ {
44805
+ "epoch": 0.9653092006033183,
44806
+ "grad_norm": 1.5885837078094482,
44807
+ "learning_rate": 3.0579183660177093e-07,
44808
+ "loss": 0.6474,
44809
+ "step": 6400
44810
+ },
44811
+ {
44812
+ "epoch": 0.9654600301659125,
44813
+ "grad_norm": 1.7469663619995117,
44814
+ "learning_rate": 3.0314124204880446e-07,
44815
+ "loss": 1.1415,
44816
+ "step": 6401
44817
+ },
44818
+ {
44819
+ "epoch": 0.9656108597285068,
44820
+ "grad_norm": 1.5996235609054565,
44821
+ "learning_rate": 3.0050215026257713e-07,
44822
+ "loss": 0.9154,
44823
+ "step": 6402
44824
+ },
44825
+ {
44826
+ "epoch": 0.9657616892911011,
44827
+ "grad_norm": 1.8189271688461304,
44828
+ "learning_rate": 2.978745618539336e-07,
44829
+ "loss": 1.0192,
44830
+ "step": 6403
44831
+ },
44832
+ {
44833
+ "epoch": 0.9659125188536953,
44834
+ "grad_norm": 1.7454121112823486,
44835
+ "learning_rate": 2.9525847743105405e-07,
44836
+ "loss": 1.0345,
44837
+ "step": 6404
44838
+ },
44839
+ {
44840
+ "epoch": 0.9660633484162896,
44841
+ "grad_norm": 1.6599011421203613,
44842
+ "learning_rate": 2.926538975994486e-07,
44843
+ "loss": 1.0556,
44844
+ "step": 6405
44845
+ },
44846
+ {
44847
+ "epoch": 0.9662141779788839,
44848
+ "grad_norm": 1.872502088546753,
44849
+ "learning_rate": 2.9006082296197946e-07,
44850
+ "loss": 1.0526,
44851
+ "step": 6406
44852
+ },
44853
+ {
44854
+ "epoch": 0.9663650075414781,
44855
+ "grad_norm": 1.8260844945907593,
44856
+ "learning_rate": 2.874792541188276e-07,
44857
+ "loss": 1.0962,
44858
+ "step": 6407
44859
+ },
44860
+ {
44861
+ "epoch": 0.9665158371040724,
44862
+ "grad_norm": 2.0990805625915527,
44863
+ "learning_rate": 2.849091916675206e-07,
44864
+ "loss": 1.28,
44865
+ "step": 6408
44866
+ },
44867
+ {
44868
+ "epoch": 0.9666666666666667,
44869
+ "grad_norm": 1.6568230390548706,
44870
+ "learning_rate": 2.8235063620292714e-07,
44871
+ "loss": 0.9448,
44872
+ "step": 6409
44873
+ },
44874
+ {
44875
+ "epoch": 0.9668174962292609,
44876
+ "grad_norm": 1.853630781173706,
44877
+ "learning_rate": 2.798035883172401e-07,
44878
+ "loss": 0.9706,
44879
+ "step": 6410
44880
+ },
44881
+ {
44882
+ "epoch": 0.9669683257918552,
44883
+ "grad_norm": 1.8414771556854248,
44884
+ "learning_rate": 2.7726804859999897e-07,
44885
+ "loss": 0.9215,
44886
+ "step": 6411
44887
+ },
44888
+ {
44889
+ "epoch": 0.9671191553544495,
44890
+ "grad_norm": 1.9662418365478516,
44891
+ "learning_rate": 2.7474401763807886e-07,
44892
+ "loss": 0.86,
44893
+ "step": 6412
44894
+ },
44895
+ {
44896
+ "epoch": 0.9672699849170437,
44897
+ "grad_norm": 1.7639135122299194,
44898
+ "learning_rate": 2.722314960156791e-07,
44899
+ "loss": 0.8866,
44900
+ "step": 6413
44901
+ },
44902
+ {
44903
+ "epoch": 0.967420814479638,
44904
+ "grad_norm": 1.749525785446167,
44905
+ "learning_rate": 2.6973048431435667e-07,
44906
+ "loss": 1.022,
44907
+ "step": 6414
44908
+ },
44909
+ {
44910
+ "epoch": 0.9675716440422323,
44911
+ "grad_norm": 1.6495894193649292,
44912
+ "learning_rate": 2.6724098311298183e-07,
44913
+ "loss": 0.7061,
44914
+ "step": 6415
44915
+ },
44916
+ {
44917
+ "epoch": 0.9677224736048265,
44918
+ "grad_norm": 1.8045361042022705,
44919
+ "learning_rate": 2.6476299298777705e-07,
44920
+ "loss": 1.0393,
44921
+ "step": 6416
44922
+ },
44923
+ {
44924
+ "epoch": 0.9678733031674208,
44925
+ "grad_norm": 1.9385876655578613,
44926
+ "learning_rate": 2.6229651451229465e-07,
44927
+ "loss": 1.0813,
44928
+ "step": 6417
44929
+ },
44930
+ {
44931
+ "epoch": 0.9680241327300151,
44932
+ "grad_norm": 2.213212490081787,
44933
+ "learning_rate": 2.5984154825742235e-07,
44934
+ "loss": 1.3767,
44935
+ "step": 6418
44936
+ },
44937
+ {
44938
+ "epoch": 0.9681749622926094,
44939
+ "grad_norm": 2.0189006328582764,
44940
+ "learning_rate": 2.573980947913779e-07,
44941
+ "loss": 1.4085,
44942
+ "step": 6419
44943
+ },
44944
+ {
44945
+ "epoch": 0.9683257918552036,
44946
+ "grad_norm": 2.1515588760375977,
44947
+ "learning_rate": 2.549661546797255e-07,
44948
+ "loss": 1.3382,
44949
+ "step": 6420
44950
+ },
44951
+ {
44952
+ "epoch": 0.9684766214177979,
44953
+ "grad_norm": 2.0453109741210938,
44954
+ "learning_rate": 2.5254572848535383e-07,
44955
+ "loss": 1.1791,
44956
+ "step": 6421
44957
+ },
44958
+ {
44959
+ "epoch": 0.9686274509803922,
44960
+ "grad_norm": 2.314772844314575,
44961
+ "learning_rate": 2.501368167684981e-07,
44962
+ "loss": 1.2573,
44963
+ "step": 6422
44964
+ },
44965
+ {
44966
+ "epoch": 0.9687782805429864,
44967
+ "grad_norm": 1.948198914527893,
44968
+ "learning_rate": 2.477394200867178e-07,
44969
+ "loss": 1.0669,
44970
+ "step": 6423
44971
+ },
44972
+ {
44973
+ "epoch": 0.9689291101055807,
44974
+ "grad_norm": 2.3818795680999756,
44975
+ "learning_rate": 2.45353538994908e-07,
44976
+ "loss": 1.5041,
44977
+ "step": 6424
44978
+ },
44979
+ {
44980
+ "epoch": 0.969079939668175,
44981
+ "grad_norm": 1.7617933750152588,
44982
+ "learning_rate": 2.429791740453102e-07,
44983
+ "loss": 0.9809,
44984
+ "step": 6425
44985
+ },
44986
+ {
44987
+ "epoch": 0.9692307692307692,
44988
+ "grad_norm": 1.5112937688827515,
44989
+ "learning_rate": 2.406163257874794e-07,
44990
+ "loss": 0.6922,
44991
+ "step": 6426
44992
+ },
44993
+ {
44994
+ "epoch": 0.9693815987933635,
44995
+ "grad_norm": 1.7321679592132568,
44996
+ "learning_rate": 2.3826499476832797e-07,
44997
+ "loss": 0.8135,
44998
+ "step": 6427
44999
+ },
45000
+ {
45001
+ "epoch": 0.9695324283559578,
45002
+ "grad_norm": 2.0587239265441895,
45003
+ "learning_rate": 2.3592518153208732e-07,
45004
+ "loss": 0.9306,
45005
+ "step": 6428
45006
+ },
45007
+ {
45008
+ "epoch": 0.969683257918552,
45009
+ "grad_norm": 2.0357539653778076,
45010
+ "learning_rate": 2.3359688662032975e-07,
45011
+ "loss": 1.1696,
45012
+ "step": 6429
45013
+ },
45014
+ {
45015
+ "epoch": 0.9698340874811463,
45016
+ "grad_norm": 1.9683305025100708,
45017
+ "learning_rate": 2.3128011057195753e-07,
45018
+ "loss": 1.0745,
45019
+ "step": 6430
45020
+ },
45021
+ {
45022
+ "epoch": 0.9699849170437406,
45023
+ "grad_norm": 2.4005391597747803,
45024
+ "learning_rate": 2.289748539232084e-07,
45025
+ "loss": 1.2111,
45026
+ "step": 6431
45027
+ },
45028
+ {
45029
+ "epoch": 0.9701357466063348,
45030
+ "grad_norm": 1.8384374380111694,
45031
+ "learning_rate": 2.2668111720764996e-07,
45032
+ "loss": 0.9713,
45033
+ "step": 6432
45034
+ },
45035
+ {
45036
+ "epoch": 0.9702865761689291,
45037
+ "grad_norm": 2.119652271270752,
45038
+ "learning_rate": 2.2439890095619641e-07,
45039
+ "loss": 1.1779,
45040
+ "step": 6433
45041
+ },
45042
+ {
45043
+ "epoch": 0.9704374057315234,
45044
+ "grad_norm": 2.0867671966552734,
45045
+ "learning_rate": 2.2212820569707526e-07,
45046
+ "loss": 1.1961,
45047
+ "step": 6434
45048
+ },
45049
+ {
45050
+ "epoch": 0.9705882352941176,
45051
+ "grad_norm": 1.7994745969772339,
45052
+ "learning_rate": 2.1986903195586607e-07,
45053
+ "loss": 0.9142,
45054
+ "step": 6435
45055
+ },
45056
+ {
45057
+ "epoch": 0.9707390648567119,
45058
+ "grad_norm": 2.197286367416382,
45059
+ "learning_rate": 2.1762138025547275e-07,
45060
+ "loss": 1.2425,
45061
+ "step": 6436
45062
+ },
45063
+ {
45064
+ "epoch": 0.9708898944193062,
45065
+ "grad_norm": 1.8814419507980347,
45066
+ "learning_rate": 2.1538525111613473e-07,
45067
+ "loss": 1.0905,
45068
+ "step": 6437
45069
+ },
45070
+ {
45071
+ "epoch": 0.9710407239819004,
45072
+ "grad_norm": 2.010801315307617,
45073
+ "learning_rate": 2.1316064505542133e-07,
45074
+ "loss": 1.0614,
45075
+ "step": 6438
45076
+ },
45077
+ {
45078
+ "epoch": 0.9711915535444947,
45079
+ "grad_norm": 1.5283746719360352,
45080
+ "learning_rate": 2.1094756258823735e-07,
45081
+ "loss": 0.6536,
45082
+ "step": 6439
45083
+ },
45084
+ {
45085
+ "epoch": 0.971342383107089,
45086
+ "grad_norm": 2.065852165222168,
45087
+ "learning_rate": 2.08746004226823e-07,
45088
+ "loss": 1.2922,
45089
+ "step": 6440
45090
+ },
45091
+ {
45092
+ "epoch": 0.9714932126696832,
45093
+ "grad_norm": 1.914737582206726,
45094
+ "learning_rate": 2.0655597048074293e-07,
45095
+ "loss": 1.1637,
45096
+ "step": 6441
45097
+ },
45098
+ {
45099
+ "epoch": 0.9716440422322775,
45100
+ "grad_norm": 2.0344982147216797,
45101
+ "learning_rate": 2.043774618568972e-07,
45102
+ "loss": 1.0028,
45103
+ "step": 6442
45104
+ },
45105
+ {
45106
+ "epoch": 0.9717948717948718,
45107
+ "grad_norm": 2.1902503967285156,
45108
+ "learning_rate": 2.0221047885953248e-07,
45109
+ "loss": 1.1506,
45110
+ "step": 6443
45111
+ },
45112
+ {
45113
+ "epoch": 0.971945701357466,
45114
+ "grad_norm": 2.2948451042175293,
45115
+ "learning_rate": 2.0005502199020309e-07,
45116
+ "loss": 1.2524,
45117
+ "step": 6444
45118
+ },
45119
+ {
45120
+ "epoch": 0.9720965309200603,
45121
+ "grad_norm": 2.206702709197998,
45122
+ "learning_rate": 1.9791109174780996e-07,
45123
+ "loss": 1.012,
45124
+ "step": 6445
45125
+ },
45126
+ {
45127
+ "epoch": 0.9722473604826546,
45128
+ "grad_norm": 1.6949716806411743,
45129
+ "learning_rate": 1.957786886285895e-07,
45130
+ "loss": 0.734,
45131
+ "step": 6446
45132
+ },
45133
+ {
45134
+ "epoch": 0.9723981900452489,
45135
+ "grad_norm": 1.4771981239318848,
45136
+ "learning_rate": 1.9365781312610244e-07,
45137
+ "loss": 0.6859,
45138
+ "step": 6447
45139
+ },
45140
+ {
45141
+ "epoch": 0.9725490196078431,
45142
+ "grad_norm": 1.7075660228729248,
45143
+ "learning_rate": 1.915484657312505e-07,
45144
+ "loss": 0.7901,
45145
+ "step": 6448
45146
+ },
45147
+ {
45148
+ "epoch": 0.9726998491704374,
45149
+ "grad_norm": 2.1574273109436035,
45150
+ "learning_rate": 1.8945064693224322e-07,
45151
+ "loss": 1.1075,
45152
+ "step": 6449
45153
+ },
45154
+ {
45155
+ "epoch": 0.9728506787330317,
45156
+ "grad_norm": 1.7101869583129883,
45157
+ "learning_rate": 1.873643572146533e-07,
45158
+ "loss": 0.8212,
45159
+ "step": 6450
45160
+ },
45161
+ {
45162
+ "epoch": 0.9730015082956259,
45163
+ "grad_norm": 2.248549461364746,
45164
+ "learning_rate": 1.8528959706136662e-07,
45165
+ "loss": 1.9165,
45166
+ "step": 6451
45167
+ },
45168
+ {
45169
+ "epoch": 0.9731523378582202,
45170
+ "grad_norm": 2.1564855575561523,
45171
+ "learning_rate": 1.8322636695260465e-07,
45172
+ "loss": 1.3283,
45173
+ "step": 6452
45174
+ },
45175
+ {
45176
+ "epoch": 0.9733031674208145,
45177
+ "grad_norm": 1.9545012712478638,
45178
+ "learning_rate": 1.811746673659187e-07,
45179
+ "loss": 1.0547,
45180
+ "step": 6453
45181
  }
45182
  ],
45183
  "logging_steps": 1,
 
45197
  "attributes": {}
45198
  }
45199
  },
45200
+ "total_flos": 7.172916759861658e+18,
45201
  "train_batch_size": 4,
45202
  "trial_name": null,
45203
  "trial_params": null