diaenra commited on
Commit
94fe907
·
verified ·
1 Parent(s): f77b21b

Training in progress, step 5945, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebef31135b45565df6ed5f3402d41965c0085e3579054017540bd9502336d5bc
3
  size 516802328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70c3b19bc4eb8be76e3f5a614ad9651dac58cc5c1cf201702fc3087309d23ca7
3
  size 516802328
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9410d6faf0b0b2a20a9f5d7e528ee5c22798ebdce85f3ee5ebc57c74c84b3d40
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3662bc0e65237626b783f1e110f32d0f58b50371353139f3380aa21907d63486
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2d74ffd31206d3e81b71ea7d00d130021e2a8ddf2f551c8cf2a0016186f7977
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee9c30235a00d4e956792d86eabe739f21989b7ae3dfec94f60c7acfc17fd683
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9647632663358843,
5
  "eval_steps": 500,
6
- "global_step": 5736,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -40159,6 +40159,1469 @@
40159
  "learning_rate": 3.151421924994513e-07,
40160
  "loss": 0.0,
40161
  "step": 5736
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40162
  }
40163
  ],
40164
  "logging_steps": 1,
@@ -40173,12 +41636,12 @@
40173
  "should_evaluate": false,
40174
  "should_log": false,
40175
  "should_save": true,
40176
- "should_training_stop": false
40177
  },
40178
  "attributes": {}
40179
  }
40180
  },
40181
- "total_flos": 1.0797842128699392e+17,
40182
  "train_batch_size": 8,
40183
  "trial_name": null,
40184
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9999159027836179,
5
  "eval_steps": 500,
6
+ "global_step": 5945,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
40159
  "learning_rate": 3.151421924994513e-07,
40160
  "loss": 0.0,
40161
  "step": 5736
40162
+ },
40163
+ {
40164
+ "epoch": 0.9649314607686486,
40165
+ "grad_norm": NaN,
40166
+ "learning_rate": 3.121368265136704e-07,
40167
+ "loss": 0.0,
40168
+ "step": 5737
40169
+ },
40170
+ {
40171
+ "epoch": 0.9650996552014128,
40172
+ "grad_norm": NaN,
40173
+ "learning_rate": 3.091458147950255e-07,
40174
+ "loss": 0.0,
40175
+ "step": 5738
40176
+ },
40177
+ {
40178
+ "epoch": 0.9652678496341771,
40179
+ "grad_norm": NaN,
40180
+ "learning_rate": 3.0616915820758095e-07,
40181
+ "loss": 0.0,
40182
+ "step": 5739
40183
+ },
40184
+ {
40185
+ "epoch": 0.9654360440669414,
40186
+ "grad_norm": NaN,
40187
+ "learning_rate": 3.0320685761127123e-07,
40188
+ "loss": 0.0,
40189
+ "step": 5740
40190
+ },
40191
+ {
40192
+ "epoch": 0.9656042384997057,
40193
+ "grad_norm": NaN,
40194
+ "learning_rate": 3.002589138618561e-07,
40195
+ "loss": 0.0,
40196
+ "step": 5741
40197
+ },
40198
+ {
40199
+ "epoch": 0.96577243293247,
40200
+ "grad_norm": NaN,
40201
+ "learning_rate": 2.973253278109767e-07,
40202
+ "loss": 0.0,
40203
+ "step": 5742
40204
+ },
40205
+ {
40206
+ "epoch": 0.9659406273652342,
40207
+ "grad_norm": NaN,
40208
+ "learning_rate": 2.9440610030610494e-07,
40209
+ "loss": 0.0,
40210
+ "step": 5743
40211
+ },
40212
+ {
40213
+ "epoch": 0.9661088217979985,
40214
+ "grad_norm": NaN,
40215
+ "learning_rate": 2.91501232190583e-07,
40216
+ "loss": 0.0,
40217
+ "step": 5744
40218
+ },
40219
+ {
40220
+ "epoch": 0.9662770162307628,
40221
+ "grad_norm": NaN,
40222
+ "learning_rate": 2.8861072430358404e-07,
40223
+ "loss": 0.0,
40224
+ "step": 5745
40225
+ },
40226
+ {
40227
+ "epoch": 0.9664452106635271,
40228
+ "grad_norm": NaN,
40229
+ "learning_rate": 2.8573457748014564e-07,
40230
+ "loss": 0.0,
40231
+ "step": 5746
40232
+ },
40233
+ {
40234
+ "epoch": 0.9666134050962913,
40235
+ "grad_norm": NaN,
40236
+ "learning_rate": 2.8287279255115873e-07,
40237
+ "loss": 0.0,
40238
+ "step": 5747
40239
+ },
40240
+ {
40241
+ "epoch": 0.9667815995290556,
40242
+ "grad_norm": NaN,
40243
+ "learning_rate": 2.800253703433564e-07,
40244
+ "loss": 0.0,
40245
+ "step": 5748
40246
+ },
40247
+ {
40248
+ "epoch": 0.9669497939618199,
40249
+ "grad_norm": NaN,
40250
+ "learning_rate": 2.7719231167933067e-07,
40251
+ "loss": 0.0,
40252
+ "step": 5749
40253
+ },
40254
+ {
40255
+ "epoch": 0.9671179883945842,
40256
+ "grad_norm": NaN,
40257
+ "learning_rate": 2.743736173775213e-07,
40258
+ "loss": 0.0,
40259
+ "step": 5750
40260
+ },
40261
+ {
40262
+ "epoch": 0.9672861828273485,
40263
+ "grad_norm": NaN,
40264
+ "learning_rate": 2.715692882522103e-07,
40265
+ "loss": 0.0,
40266
+ "step": 5751
40267
+ },
40268
+ {
40269
+ "epoch": 0.9674543772601127,
40270
+ "grad_norm": NaN,
40271
+ "learning_rate": 2.687793251135384e-07,
40272
+ "loss": 0.0,
40273
+ "step": 5752
40274
+ },
40275
+ {
40276
+ "epoch": 0.967622571692877,
40277
+ "grad_norm": NaN,
40278
+ "learning_rate": 2.6600372876750544e-07,
40279
+ "loss": 0.0,
40280
+ "step": 5753
40281
+ },
40282
+ {
40283
+ "epoch": 0.9677907661256412,
40284
+ "grad_norm": NaN,
40285
+ "learning_rate": 2.6324250001593664e-07,
40286
+ "loss": 0.0,
40287
+ "step": 5754
40288
+ },
40289
+ {
40290
+ "epoch": 0.9679589605584055,
40291
+ "grad_norm": NaN,
40292
+ "learning_rate": 2.604956396565328e-07,
40293
+ "loss": 0.0,
40294
+ "step": 5755
40295
+ },
40296
+ {
40297
+ "epoch": 0.9681271549911697,
40298
+ "grad_norm": NaN,
40299
+ "learning_rate": 2.577631484828147e-07,
40300
+ "loss": 0.0,
40301
+ "step": 5756
40302
+ },
40303
+ {
40304
+ "epoch": 0.968295349423934,
40305
+ "grad_norm": NaN,
40306
+ "learning_rate": 2.550450272841842e-07,
40307
+ "loss": 0.0,
40308
+ "step": 5757
40309
+ },
40310
+ {
40311
+ "epoch": 0.9684635438566983,
40312
+ "grad_norm": NaN,
40313
+ "learning_rate": 2.523412768458688e-07,
40314
+ "loss": 0.0,
40315
+ "step": 5758
40316
+ },
40317
+ {
40318
+ "epoch": 0.9686317382894626,
40319
+ "grad_norm": NaN,
40320
+ "learning_rate": 2.4965189794895485e-07,
40321
+ "loss": 0.0,
40322
+ "step": 5759
40323
+ },
40324
+ {
40325
+ "epoch": 0.9687999327222269,
40326
+ "grad_norm": NaN,
40327
+ "learning_rate": 2.469768913703707e-07,
40328
+ "loss": 0.0,
40329
+ "step": 5760
40330
+ },
40331
+ {
40332
+ "epoch": 0.9689681271549911,
40333
+ "grad_norm": NaN,
40334
+ "learning_rate": 2.443162578828928e-07,
40335
+ "loss": 0.0,
40336
+ "step": 5761
40337
+ },
40338
+ {
40339
+ "epoch": 0.9691363215877554,
40340
+ "grad_norm": NaN,
40341
+ "learning_rate": 2.4166999825515625e-07,
40342
+ "loss": 0.0,
40343
+ "step": 5762
40344
+ },
40345
+ {
40346
+ "epoch": 0.9693045160205197,
40347
+ "grad_norm": NaN,
40348
+ "learning_rate": 2.3903811325163283e-07,
40349
+ "loss": 0.0,
40350
+ "step": 5763
40351
+ },
40352
+ {
40353
+ "epoch": 0.969472710453284,
40354
+ "grad_norm": NaN,
40355
+ "learning_rate": 2.3642060363264217e-07,
40356
+ "loss": 0.0,
40357
+ "step": 5764
40358
+ },
40359
+ {
40360
+ "epoch": 0.9696409048860483,
40361
+ "grad_norm": NaN,
40362
+ "learning_rate": 2.3381747015435163e-07,
40363
+ "loss": 0.0,
40364
+ "step": 5765
40365
+ },
40366
+ {
40367
+ "epoch": 0.9698090993188125,
40368
+ "grad_norm": NaN,
40369
+ "learning_rate": 2.312287135687874e-07,
40370
+ "loss": 0.0,
40371
+ "step": 5766
40372
+ },
40373
+ {
40374
+ "epoch": 0.9699772937515768,
40375
+ "grad_norm": NaN,
40376
+ "learning_rate": 2.2865433462380125e-07,
40377
+ "loss": 0.0,
40378
+ "step": 5767
40379
+ },
40380
+ {
40381
+ "epoch": 0.9701454881843411,
40382
+ "grad_norm": NaN,
40383
+ "learning_rate": 2.2609433406310941e-07,
40384
+ "loss": 0.0,
40385
+ "step": 5768
40386
+ },
40387
+ {
40388
+ "epoch": 0.9703136826171054,
40389
+ "grad_norm": NaN,
40390
+ "learning_rate": 2.2354871262626477e-07,
40391
+ "loss": 0.0,
40392
+ "step": 5769
40393
+ },
40394
+ {
40395
+ "epoch": 0.9704818770498697,
40396
+ "grad_norm": NaN,
40397
+ "learning_rate": 2.2101747104866788e-07,
40398
+ "loss": 0.0,
40399
+ "step": 5770
40400
+ },
40401
+ {
40402
+ "epoch": 0.9706500714826339,
40403
+ "grad_norm": NaN,
40404
+ "learning_rate": 2.185006100615672e-07,
40405
+ "loss": 0.0,
40406
+ "step": 5771
40407
+ },
40408
+ {
40409
+ "epoch": 0.9708182659153982,
40410
+ "grad_norm": NaN,
40411
+ "learning_rate": 2.159981303920533e-07,
40412
+ "loss": 0.0,
40413
+ "step": 5772
40414
+ },
40415
+ {
40416
+ "epoch": 0.9709864603481625,
40417
+ "grad_norm": NaN,
40418
+ "learning_rate": 2.1351003276307014e-07,
40419
+ "loss": 0.0,
40420
+ "step": 5773
40421
+ },
40422
+ {
40423
+ "epoch": 0.9711546547809268,
40424
+ "grad_norm": NaN,
40425
+ "learning_rate": 2.1103631789339272e-07,
40426
+ "loss": 0.0,
40427
+ "step": 5774
40428
+ },
40429
+ {
40430
+ "epoch": 0.971322849213691,
40431
+ "grad_norm": NaN,
40432
+ "learning_rate": 2.0857698649766055e-07,
40433
+ "loss": 0.0,
40434
+ "step": 5775
40435
+ },
40436
+ {
40437
+ "epoch": 0.9714910436464553,
40438
+ "grad_norm": NaN,
40439
+ "learning_rate": 2.061320392863386e-07,
40440
+ "loss": 0.0,
40441
+ "step": 5776
40442
+ },
40443
+ {
40444
+ "epoch": 0.9716592380792196,
40445
+ "grad_norm": NaN,
40446
+ "learning_rate": 2.0370147696574526e-07,
40447
+ "loss": 0.0,
40448
+ "step": 5777
40449
+ },
40450
+ {
40451
+ "epoch": 0.9718274325119839,
40452
+ "grad_norm": NaN,
40453
+ "learning_rate": 2.012853002380466e-07,
40454
+ "loss": 0.0,
40455
+ "step": 5778
40456
+ },
40457
+ {
40458
+ "epoch": 0.9719956269447482,
40459
+ "grad_norm": NaN,
40460
+ "learning_rate": 1.988835098012509e-07,
40461
+ "loss": 0.0,
40462
+ "step": 5779
40463
+ },
40464
+ {
40465
+ "epoch": 0.9721638213775124,
40466
+ "grad_norm": NaN,
40467
+ "learning_rate": 1.9649610634919767e-07,
40468
+ "loss": 0.0,
40469
+ "step": 5780
40470
+ },
40471
+ {
40472
+ "epoch": 0.9723320158102767,
40473
+ "grad_norm": NaN,
40474
+ "learning_rate": 1.9412309057159073e-07,
40475
+ "loss": 0.0,
40476
+ "step": 5781
40477
+ },
40478
+ {
40479
+ "epoch": 0.972500210243041,
40480
+ "grad_norm": NaN,
40481
+ "learning_rate": 1.9176446315397056e-07,
40482
+ "loss": 0.0,
40483
+ "step": 5782
40484
+ },
40485
+ {
40486
+ "epoch": 0.9726684046758052,
40487
+ "grad_norm": NaN,
40488
+ "learning_rate": 1.894202247777088e-07,
40489
+ "loss": 0.0,
40490
+ "step": 5783
40491
+ },
40492
+ {
40493
+ "epoch": 0.9728365991085695,
40494
+ "grad_norm": NaN,
40495
+ "learning_rate": 1.8709037612003045e-07,
40496
+ "loss": 0.0,
40497
+ "step": 5784
40498
+ },
40499
+ {
40500
+ "epoch": 0.9730047935413337,
40501
+ "grad_norm": NaN,
40502
+ "learning_rate": 1.8477491785400813e-07,
40503
+ "loss": 0.0,
40504
+ "step": 5785
40505
+ },
40506
+ {
40507
+ "epoch": 0.973172987974098,
40508
+ "grad_norm": NaN,
40509
+ "learning_rate": 1.8247385064855127e-07,
40510
+ "loss": 0.0,
40511
+ "step": 5786
40512
+ },
40513
+ {
40514
+ "epoch": 0.9733411824068623,
40515
+ "grad_norm": NaN,
40516
+ "learning_rate": 1.8018717516841143e-07,
40517
+ "loss": 0.0,
40518
+ "step": 5787
40519
+ },
40520
+ {
40521
+ "epoch": 0.9735093768396266,
40522
+ "grad_norm": NaN,
40523
+ "learning_rate": 1.779148920741769e-07,
40524
+ "loss": 0.0,
40525
+ "step": 5788
40526
+ },
40527
+ {
40528
+ "epoch": 0.9736775712723909,
40529
+ "grad_norm": NaN,
40530
+ "learning_rate": 1.7565700202229473e-07,
40531
+ "loss": 0.0,
40532
+ "step": 5789
40533
+ },
40534
+ {
40535
+ "epoch": 0.9738457657051551,
40536
+ "grad_norm": NaN,
40537
+ "learning_rate": 1.7341350566504323e-07,
40538
+ "loss": 0.0,
40539
+ "step": 5790
40540
+ },
40541
+ {
40542
+ "epoch": 0.9740139601379194,
40543
+ "grad_norm": NaN,
40544
+ "learning_rate": 1.7118440365053722e-07,
40545
+ "loss": 0.0,
40546
+ "step": 5791
40547
+ },
40548
+ {
40549
+ "epoch": 0.9741821545706837,
40550
+ "grad_norm": NaN,
40551
+ "learning_rate": 1.6896969662273944e-07,
40552
+ "loss": 0.0,
40553
+ "step": 5792
40554
+ },
40555
+ {
40556
+ "epoch": 0.974350349003448,
40557
+ "grad_norm": NaN,
40558
+ "learning_rate": 1.6676938522146023e-07,
40559
+ "loss": 0.0,
40560
+ "step": 5793
40561
+ },
40562
+ {
40563
+ "epoch": 0.9745185434362122,
40564
+ "grad_norm": NaN,
40565
+ "learning_rate": 1.645834700823412e-07,
40566
+ "loss": 0.0,
40567
+ "step": 5794
40568
+ },
40569
+ {
40570
+ "epoch": 0.9746867378689765,
40571
+ "grad_norm": NaN,
40572
+ "learning_rate": 1.6241195183686608e-07,
40573
+ "loss": 0.0,
40574
+ "step": 5795
40575
+ },
40576
+ {
40577
+ "epoch": 0.9748549323017408,
40578
+ "grad_norm": NaN,
40579
+ "learning_rate": 1.6025483111236638e-07,
40580
+ "loss": 0.0,
40581
+ "step": 5796
40582
+ },
40583
+ {
40584
+ "epoch": 0.9750231267345051,
40585
+ "grad_norm": NaN,
40586
+ "learning_rate": 1.581121085320103e-07,
40587
+ "loss": 0.0,
40588
+ "step": 5797
40589
+ },
40590
+ {
40591
+ "epoch": 0.9751913211672694,
40592
+ "grad_norm": NaN,
40593
+ "learning_rate": 1.5598378471480267e-07,
40594
+ "loss": 0.0,
40595
+ "step": 5798
40596
+ },
40597
+ {
40598
+ "epoch": 0.9753595156000336,
40599
+ "grad_norm": NaN,
40600
+ "learning_rate": 1.5386986027559613e-07,
40601
+ "loss": 0.0,
40602
+ "step": 5799
40603
+ },
40604
+ {
40605
+ "epoch": 0.9755277100327979,
40606
+ "grad_norm": NaN,
40607
+ "learning_rate": 1.5177033582507993e-07,
40608
+ "loss": 0.0,
40609
+ "step": 5800
40610
+ },
40611
+ {
40612
+ "epoch": 0.9756959044655622,
40613
+ "grad_norm": NaN,
40614
+ "learning_rate": 1.4968521196978002e-07,
40615
+ "loss": 0.0,
40616
+ "step": 5801
40617
+ },
40618
+ {
40619
+ "epoch": 0.9758640988983265,
40620
+ "grad_norm": NaN,
40621
+ "learning_rate": 1.4761448931206455e-07,
40622
+ "loss": 0.0,
40623
+ "step": 5802
40624
+ },
40625
+ {
40626
+ "epoch": 0.9760322933310908,
40627
+ "grad_norm": NaN,
40628
+ "learning_rate": 1.4555816845014948e-07,
40629
+ "loss": 0.0,
40630
+ "step": 5803
40631
+ },
40632
+ {
40633
+ "epoch": 0.976200487763855,
40634
+ "grad_norm": NaN,
40635
+ "learning_rate": 1.4351624997807623e-07,
40636
+ "loss": 0.0,
40637
+ "step": 5804
40638
+ },
40639
+ {
40640
+ "epoch": 0.9763686821966193,
40641
+ "grad_norm": NaN,
40642
+ "learning_rate": 1.4148873448573408e-07,
40643
+ "loss": 0.0,
40644
+ "step": 5805
40645
+ },
40646
+ {
40647
+ "epoch": 0.9765368766293836,
40648
+ "grad_norm": NaN,
40649
+ "learning_rate": 1.3947562255884338e-07,
40650
+ "loss": 0.0,
40651
+ "step": 5806
40652
+ },
40653
+ {
40654
+ "epoch": 0.9767050710621479,
40655
+ "grad_norm": NaN,
40656
+ "learning_rate": 1.374769147789834e-07,
40657
+ "loss": 0.0,
40658
+ "step": 5807
40659
+ },
40660
+ {
40661
+ "epoch": 0.9768732654949122,
40662
+ "grad_norm": NaN,
40663
+ "learning_rate": 1.3549261172354777e-07,
40664
+ "loss": 0.0,
40665
+ "step": 5808
40666
+ },
40667
+ {
40668
+ "epoch": 0.9770414599276764,
40669
+ "grad_norm": NaN,
40670
+ "learning_rate": 1.3352271396577798e-07,
40671
+ "loss": 0.0,
40672
+ "step": 5809
40673
+ },
40674
+ {
40675
+ "epoch": 0.9772096543604407,
40676
+ "grad_norm": NaN,
40677
+ "learning_rate": 1.3156722207476324e-07,
40678
+ "loss": 0.0,
40679
+ "step": 5810
40680
+ },
40681
+ {
40682
+ "epoch": 0.977377848793205,
40683
+ "grad_norm": NaN,
40684
+ "learning_rate": 1.2962613661541834e-07,
40685
+ "loss": 0.0,
40686
+ "step": 5811
40687
+ },
40688
+ {
40689
+ "epoch": 0.9775460432259693,
40690
+ "grad_norm": NaN,
40691
+ "learning_rate": 1.2769945814850582e-07,
40692
+ "loss": 0.0,
40693
+ "step": 5812
40694
+ },
40695
+ {
40696
+ "epoch": 0.9777142376587334,
40697
+ "grad_norm": NaN,
40698
+ "learning_rate": 1.2578718723061378e-07,
40699
+ "loss": 0.0,
40700
+ "step": 5813
40701
+ },
40702
+ {
40703
+ "epoch": 0.9778824320914977,
40704
+ "grad_norm": NaN,
40705
+ "learning_rate": 1.2388932441418367e-07,
40706
+ "loss": 0.0,
40707
+ "step": 5814
40708
+ },
40709
+ {
40710
+ "epoch": 0.978050626524262,
40711
+ "grad_norm": NaN,
40712
+ "learning_rate": 1.220058702474769e-07,
40713
+ "loss": 0.0,
40714
+ "step": 5815
40715
+ },
40716
+ {
40717
+ "epoch": 0.9782188209570263,
40718
+ "grad_norm": NaN,
40719
+ "learning_rate": 1.2013682527461379e-07,
40720
+ "loss": 0.0,
40721
+ "step": 5816
40722
+ },
40723
+ {
40724
+ "epoch": 0.9783870153897906,
40725
+ "grad_norm": NaN,
40726
+ "learning_rate": 1.1828219003553465e-07,
40727
+ "loss": 0.0,
40728
+ "step": 5817
40729
+ },
40730
+ {
40731
+ "epoch": 0.9785552098225548,
40732
+ "grad_norm": NaN,
40733
+ "learning_rate": 1.16441965066022e-07,
40734
+ "loss": 0.0,
40735
+ "step": 5818
40736
+ },
40737
+ {
40738
+ "epoch": 0.9787234042553191,
40739
+ "grad_norm": NaN,
40740
+ "learning_rate": 1.1461615089770062e-07,
40741
+ "loss": 0.0,
40742
+ "step": 5819
40743
+ },
40744
+ {
40745
+ "epoch": 0.9788915986880834,
40746
+ "grad_norm": NaN,
40747
+ "learning_rate": 1.1280474805802632e-07,
40748
+ "loss": 0.0,
40749
+ "step": 5820
40750
+ },
40751
+ {
40752
+ "epoch": 0.9790597931208477,
40753
+ "grad_norm": NaN,
40754
+ "learning_rate": 1.110077570702861e-07,
40755
+ "loss": 0.0,
40756
+ "step": 5821
40757
+ },
40758
+ {
40759
+ "epoch": 0.979227987553612,
40760
+ "grad_norm": NaN,
40761
+ "learning_rate": 1.0922517845362023e-07,
40762
+ "loss": 0.0,
40763
+ "step": 5822
40764
+ },
40765
+ {
40766
+ "epoch": 0.9793961819863762,
40767
+ "grad_norm": NaN,
40768
+ "learning_rate": 1.0745701272298902e-07,
40769
+ "loss": 0.0,
40770
+ "step": 5823
40771
+ },
40772
+ {
40773
+ "epoch": 0.9795643764191405,
40774
+ "grad_norm": NaN,
40775
+ "learning_rate": 1.0570326038920053e-07,
40776
+ "loss": 0.0,
40777
+ "step": 5824
40778
+ },
40779
+ {
40780
+ "epoch": 0.9797325708519048,
40781
+ "grad_norm": NaN,
40782
+ "learning_rate": 1.0396392195889393e-07,
40783
+ "loss": 0.0,
40784
+ "step": 5825
40785
+ },
40786
+ {
40787
+ "epoch": 0.9799007652846691,
40788
+ "grad_norm": NaN,
40789
+ "learning_rate": 1.022389979345395e-07,
40790
+ "loss": 0.0,
40791
+ "step": 5826
40792
+ },
40793
+ {
40794
+ "epoch": 0.9800689597174334,
40795
+ "grad_norm": NaN,
40796
+ "learning_rate": 1.0052848881444976e-07,
40797
+ "loss": 0.0,
40798
+ "step": 5827
40799
+ },
40800
+ {
40801
+ "epoch": 0.9802371541501976,
40802
+ "grad_norm": NaN,
40803
+ "learning_rate": 9.883239509277942e-08,
40804
+ "loss": 0.0,
40805
+ "step": 5828
40806
+ },
40807
+ {
40808
+ "epoch": 0.9804053485829619,
40809
+ "grad_norm": NaN,
40810
+ "learning_rate": 9.715071725949765e-08,
40811
+ "loss": 0.0,
40812
+ "step": 5829
40813
+ },
40814
+ {
40815
+ "epoch": 0.9805735430157262,
40816
+ "grad_norm": NaN,
40817
+ "learning_rate": 9.54834558004325e-08,
40818
+ "loss": 0.0,
40819
+ "step": 5830
40820
+ },
40821
+ {
40822
+ "epoch": 0.9807417374484905,
40823
+ "grad_norm": NaN,
40824
+ "learning_rate": 9.383061119723757e-08,
40825
+ "loss": 0.0,
40826
+ "step": 5831
40827
+ },
40828
+ {
40829
+ "epoch": 0.9809099318812547,
40830
+ "grad_norm": NaN,
40831
+ "learning_rate": 9.219218392739759e-08,
40832
+ "loss": 0.0,
40833
+ "step": 5832
40834
+ },
40835
+ {
40836
+ "epoch": 0.981078126314019,
40837
+ "grad_norm": NaN,
40838
+ "learning_rate": 9.056817446422839e-08,
40839
+ "loss": 0.0,
40840
+ "step": 5833
40841
+ },
40842
+ {
40843
+ "epoch": 0.9812463207467833,
40844
+ "grad_norm": NaN,
40845
+ "learning_rate": 8.895858327690464e-08,
40846
+ "loss": 0.0,
40847
+ "step": 5834
40848
+ },
40849
+ {
40850
+ "epoch": 0.9814145151795476,
40851
+ "grad_norm": NaN,
40852
+ "learning_rate": 8.736341083041e-08,
40853
+ "loss": 0.0,
40854
+ "step": 5835
40855
+ },
40856
+ {
40857
+ "epoch": 0.9815827096123119,
40858
+ "grad_norm": NaN,
40859
+ "learning_rate": 8.578265758557024e-08,
40860
+ "loss": 0.0,
40861
+ "step": 5836
40862
+ },
40863
+ {
40864
+ "epoch": 0.9817509040450761,
40865
+ "grad_norm": NaN,
40866
+ "learning_rate": 8.421632399904788e-08,
40867
+ "loss": 0.0,
40868
+ "step": 5837
40869
+ },
40870
+ {
40871
+ "epoch": 0.9819190984778404,
40872
+ "grad_norm": NaN,
40873
+ "learning_rate": 8.266441052334206e-08,
40874
+ "loss": 0.0,
40875
+ "step": 5838
40876
+ },
40877
+ {
40878
+ "epoch": 0.9820872929106047,
40879
+ "grad_norm": NaN,
40880
+ "learning_rate": 8.112691760677749e-08,
40881
+ "loss": 0.0,
40882
+ "step": 5839
40883
+ },
40884
+ {
40885
+ "epoch": 0.982255487343369,
40886
+ "grad_norm": NaN,
40887
+ "learning_rate": 7.960384569353219e-08,
40888
+ "loss": 0.0,
40889
+ "step": 5840
40890
+ },
40891
+ {
40892
+ "epoch": 0.9824236817761333,
40893
+ "grad_norm": NaN,
40894
+ "learning_rate": 7.809519522358755e-08,
40895
+ "loss": 0.0,
40896
+ "step": 5841
40897
+ },
40898
+ {
40899
+ "epoch": 0.9825918762088974,
40900
+ "grad_norm": NaN,
40901
+ "learning_rate": 7.660096663278938e-08,
40902
+ "loss": 0.0,
40903
+ "step": 5842
40904
+ },
40905
+ {
40906
+ "epoch": 0.9827600706416617,
40907
+ "grad_norm": NaN,
40908
+ "learning_rate": 7.512116035279237e-08,
40909
+ "loss": 0.0,
40910
+ "step": 5843
40911
+ },
40912
+ {
40913
+ "epoch": 0.982928265074426,
40914
+ "grad_norm": NaN,
40915
+ "learning_rate": 7.365577681110458e-08,
40916
+ "loss": 0.0,
40917
+ "step": 5844
40918
+ },
40919
+ {
40920
+ "epoch": 0.9830964595071903,
40921
+ "grad_norm": NaN,
40922
+ "learning_rate": 7.220481643105403e-08,
40923
+ "loss": 0.0,
40924
+ "step": 5845
40925
+ },
40926
+ {
40927
+ "epoch": 0.9832646539399545,
40928
+ "grad_norm": NaN,
40929
+ "learning_rate": 7.076827963181099e-08,
40930
+ "loss": 0.0,
40931
+ "step": 5846
40932
+ },
40933
+ {
40934
+ "epoch": 0.9834328483727188,
40935
+ "grad_norm": NaN,
40936
+ "learning_rate": 6.934616682837125e-08,
40937
+ "loss": 0.0,
40938
+ "step": 5847
40939
+ },
40940
+ {
40941
+ "epoch": 0.9836010428054831,
40942
+ "grad_norm": NaN,
40943
+ "learning_rate": 6.79384784315673e-08,
40944
+ "loss": 0.0,
40945
+ "step": 5848
40946
+ },
40947
+ {
40948
+ "epoch": 0.9837692372382474,
40949
+ "grad_norm": NaN,
40950
+ "learning_rate": 6.65452148480683e-08,
40951
+ "loss": 0.0,
40952
+ "step": 5849
40953
+ },
40954
+ {
40955
+ "epoch": 0.9839374316710117,
40956
+ "grad_norm": NaN,
40957
+ "learning_rate": 6.516637648036894e-08,
40958
+ "loss": 0.0,
40959
+ "step": 5850
40960
+ },
40961
+ {
40962
+ "epoch": 0.9841056261037759,
40963
+ "grad_norm": NaN,
40964
+ "learning_rate": 6.380196372680058e-08,
40965
+ "loss": 0.0,
40966
+ "step": 5851
40967
+ },
40968
+ {
40969
+ "epoch": 0.9842738205365402,
40970
+ "grad_norm": NaN,
40971
+ "learning_rate": 6.245197698152571e-08,
40972
+ "loss": 0.0,
40973
+ "step": 5852
40974
+ },
40975
+ {
40976
+ "epoch": 0.9844420149693045,
40977
+ "grad_norm": NaN,
40978
+ "learning_rate": 6.111641663454903e-08,
40979
+ "loss": 0.0,
40980
+ "step": 5853
40981
+ },
40982
+ {
40983
+ "epoch": 0.9846102094020688,
40984
+ "grad_norm": NaN,
40985
+ "learning_rate": 5.979528307168414e-08,
40986
+ "loss": 0.0,
40987
+ "step": 5854
40988
+ },
40989
+ {
40990
+ "epoch": 0.9847784038348331,
40991
+ "grad_norm": NaN,
40992
+ "learning_rate": 5.84885766746035e-08,
40993
+ "loss": 0.0,
40994
+ "step": 5855
40995
+ },
40996
+ {
40997
+ "epoch": 0.9849465982675973,
40998
+ "grad_norm": NaN,
40999
+ "learning_rate": 5.7196297820794054e-08,
41000
+ "loss": 0.0,
41001
+ "step": 5856
41002
+ },
41003
+ {
41004
+ "epoch": 0.9851147927003616,
41005
+ "grad_norm": NaN,
41006
+ "learning_rate": 5.591844688358494e-08,
41007
+ "loss": 0.0,
41008
+ "step": 5857
41009
+ },
41010
+ {
41011
+ "epoch": 0.9852829871331259,
41012
+ "grad_norm": NaN,
41013
+ "learning_rate": 5.465502423213087e-08,
41014
+ "loss": 0.0,
41015
+ "step": 5858
41016
+ },
41017
+ {
41018
+ "epoch": 0.9854511815658902,
41019
+ "grad_norm": NaN,
41020
+ "learning_rate": 5.340603023141766e-08,
41021
+ "loss": 0.0,
41022
+ "step": 5859
41023
+ },
41024
+ {
41025
+ "epoch": 0.9856193759986545,
41026
+ "grad_norm": NaN,
41027
+ "learning_rate": 5.217146524226779e-08,
41028
+ "loss": 0.0,
41029
+ "step": 5860
41030
+ },
41031
+ {
41032
+ "epoch": 0.9857875704314187,
41033
+ "grad_norm": NaN,
41034
+ "learning_rate": 5.0951329621340416e-08,
41035
+ "loss": 0.0,
41036
+ "step": 5861
41037
+ },
41038
+ {
41039
+ "epoch": 0.985955764864183,
41040
+ "grad_norm": NaN,
41041
+ "learning_rate": 4.9745623721109135e-08,
41042
+ "loss": 0.0,
41043
+ "step": 5862
41044
+ },
41045
+ {
41046
+ "epoch": 0.9861239592969473,
41047
+ "grad_norm": NaN,
41048
+ "learning_rate": 4.855434788988977e-08,
41049
+ "loss": 0.0,
41050
+ "step": 5863
41051
+ },
41052
+ {
41053
+ "epoch": 0.9862921537297116,
41054
+ "grad_norm": NaN,
41055
+ "learning_rate": 4.737750247183481e-08,
41056
+ "loss": 0.0,
41057
+ "step": 5864
41058
+ },
41059
+ {
41060
+ "epoch": 0.9864603481624759,
41061
+ "grad_norm": NaN,
41062
+ "learning_rate": 4.621508780691119e-08,
41063
+ "loss": 0.0,
41064
+ "step": 5865
41065
+ },
41066
+ {
41067
+ "epoch": 0.9866285425952401,
41068
+ "grad_norm": NaN,
41069
+ "learning_rate": 4.506710423093918e-08,
41070
+ "loss": 0.0,
41071
+ "step": 5866
41072
+ },
41073
+ {
41074
+ "epoch": 0.9867967370280044,
41075
+ "grad_norm": NaN,
41076
+ "learning_rate": 4.39335520755535e-08,
41077
+ "loss": 0.0,
41078
+ "step": 5867
41079
+ },
41080
+ {
41081
+ "epoch": 0.9869649314607687,
41082
+ "grad_norm": NaN,
41083
+ "learning_rate": 4.281443166822552e-08,
41084
+ "loss": 0.0,
41085
+ "step": 5868
41086
+ },
41087
+ {
41088
+ "epoch": 0.987133125893533,
41089
+ "grad_norm": NaN,
41090
+ "learning_rate": 4.1709743332252196e-08,
41091
+ "loss": 0.0,
41092
+ "step": 5869
41093
+ },
41094
+ {
41095
+ "epoch": 0.9873013203262972,
41096
+ "grad_norm": NaN,
41097
+ "learning_rate": 4.061948738677268e-08,
41098
+ "loss": 0.0,
41099
+ "step": 5870
41100
+ },
41101
+ {
41102
+ "epoch": 0.9874695147590615,
41103
+ "grad_norm": NaN,
41104
+ "learning_rate": 3.9543664146746154e-08,
41105
+ "loss": 0.0,
41106
+ "step": 5871
41107
+ },
41108
+ {
41109
+ "epoch": 0.9876377091918257,
41110
+ "grad_norm": NaN,
41111
+ "learning_rate": 3.8482273922962884e-08,
41112
+ "loss": 0.0,
41113
+ "step": 5872
41114
+ },
41115
+ {
41116
+ "epoch": 0.98780590362459,
41117
+ "grad_norm": NaN,
41118
+ "learning_rate": 3.743531702204983e-08,
41119
+ "loss": 0.0,
41120
+ "step": 5873
41121
+ },
41122
+ {
41123
+ "epoch": 0.9879740980573543,
41124
+ "grad_norm": NaN,
41125
+ "learning_rate": 3.6402793746465045e-08,
41126
+ "loss": 0.0,
41127
+ "step": 5874
41128
+ },
41129
+ {
41130
+ "epoch": 0.9881422924901185,
41131
+ "grad_norm": NaN,
41132
+ "learning_rate": 3.538470439448105e-08,
41133
+ "loss": 0.0,
41134
+ "step": 5875
41135
+ },
41136
+ {
41137
+ "epoch": 0.9883104869228828,
41138
+ "grad_norm": NaN,
41139
+ "learning_rate": 3.438104926022923e-08,
41140
+ "loss": 0.0,
41141
+ "step": 5876
41142
+ },
41143
+ {
41144
+ "epoch": 0.9884786813556471,
41145
+ "grad_norm": NaN,
41146
+ "learning_rate": 3.339182863363877e-08,
41147
+ "loss": 0.0,
41148
+ "step": 5877
41149
+ },
41150
+ {
41151
+ "epoch": 0.9886468757884114,
41152
+ "grad_norm": NaN,
41153
+ "learning_rate": 3.241704280049218e-08,
41154
+ "loss": 0.0,
41155
+ "step": 5878
41156
+ },
41157
+ {
41158
+ "epoch": 0.9888150702211757,
41159
+ "grad_norm": NaN,
41160
+ "learning_rate": 3.145669204239754e-08,
41161
+ "loss": 0.0,
41162
+ "step": 5879
41163
+ },
41164
+ {
41165
+ "epoch": 0.9889832646539399,
41166
+ "grad_norm": NaN,
41167
+ "learning_rate": 3.051077663677737e-08,
41168
+ "loss": 0.0,
41169
+ "step": 5880
41170
+ },
41171
+ {
41172
+ "epoch": 0.9891514590867042,
41173
+ "grad_norm": NaN,
41174
+ "learning_rate": 2.9579296856907523e-08,
41175
+ "loss": 0.0,
41176
+ "step": 5881
41177
+ },
41178
+ {
41179
+ "epoch": 0.9893196535194685,
41180
+ "grad_norm": NaN,
41181
+ "learning_rate": 2.86622529718783e-08,
41182
+ "loss": 0.0,
41183
+ "step": 5882
41184
+ },
41185
+ {
41186
+ "epoch": 0.9894878479522328,
41187
+ "grad_norm": NaN,
41188
+ "learning_rate": 2.775964524661667e-08,
41189
+ "loss": 0.0,
41190
+ "step": 5883
41191
+ },
41192
+ {
41193
+ "epoch": 0.989656042384997,
41194
+ "grad_norm": NaN,
41195
+ "learning_rate": 2.6871473941864067e-08,
41196
+ "loss": 0.0,
41197
+ "step": 5884
41198
+ },
41199
+ {
41200
+ "epoch": 0.9898242368177613,
41201
+ "grad_norm": NaN,
41202
+ "learning_rate": 2.599773931422078e-08,
41203
+ "loss": 0.0,
41204
+ "step": 5885
41205
+ },
41206
+ {
41207
+ "epoch": 0.9899924312505256,
41208
+ "grad_norm": NaN,
41209
+ "learning_rate": 2.5138441616079367e-08,
41210
+ "loss": 0.0,
41211
+ "step": 5886
41212
+ },
41213
+ {
41214
+ "epoch": 0.9901606256832899,
41215
+ "grad_norm": NaN,
41216
+ "learning_rate": 2.4293581095696794e-08,
41217
+ "loss": 0.0,
41218
+ "step": 5887
41219
+ },
41220
+ {
41221
+ "epoch": 0.9903288201160542,
41222
+ "grad_norm": NaN,
41223
+ "learning_rate": 2.346315799713894e-08,
41224
+ "loss": 0.0,
41225
+ "step": 5888
41226
+ },
41227
+ {
41228
+ "epoch": 0.9904970145488184,
41229
+ "grad_norm": NaN,
41230
+ "learning_rate": 2.264717256030835e-08,
41231
+ "loss": 0.0,
41232
+ "step": 5889
41233
+ },
41234
+ {
41235
+ "epoch": 0.9906652089815827,
41236
+ "grad_norm": NaN,
41237
+ "learning_rate": 2.1845625020927572e-08,
41238
+ "loss": 0.0,
41239
+ "step": 5890
41240
+ },
41241
+ {
41242
+ "epoch": 0.990833403414347,
41243
+ "grad_norm": NaN,
41244
+ "learning_rate": 2.105851561056138e-08,
41245
+ "loss": 0.0,
41246
+ "step": 5891
41247
+ },
41248
+ {
41249
+ "epoch": 0.9910015978471113,
41250
+ "grad_norm": NaN,
41251
+ "learning_rate": 2.0285844556588996e-08,
41252
+ "loss": 0.0,
41253
+ "step": 5892
41254
+ },
41255
+ {
41256
+ "epoch": 0.9911697922798756,
41257
+ "grad_norm": NaN,
41258
+ "learning_rate": 1.952761208223186e-08,
41259
+ "loss": 0.0,
41260
+ "step": 5893
41261
+ },
41262
+ {
41263
+ "epoch": 0.9913379867126398,
41264
+ "grad_norm": NaN,
41265
+ "learning_rate": 1.878381840653698e-08,
41266
+ "loss": 0.0,
41267
+ "step": 5894
41268
+ },
41269
+ {
41270
+ "epoch": 0.9915061811454041,
41271
+ "grad_norm": NaN,
41272
+ "learning_rate": 1.8054463744376914e-08,
41273
+ "loss": 0.0,
41274
+ "step": 5895
41275
+ },
41276
+ {
41277
+ "epoch": 0.9916743755781684,
41278
+ "grad_norm": NaN,
41279
+ "learning_rate": 1.7339548306449794e-08,
41280
+ "loss": 0.0,
41281
+ "step": 5896
41282
+ },
41283
+ {
41284
+ "epoch": 0.9918425700109327,
41285
+ "grad_norm": NaN,
41286
+ "learning_rate": 1.6639072299284852e-08,
41287
+ "loss": 0.0,
41288
+ "step": 5897
41289
+ },
41290
+ {
41291
+ "epoch": 0.992010764443697,
41292
+ "grad_norm": NaN,
41293
+ "learning_rate": 1.5953035925253547e-08,
41294
+ "loss": 0.0,
41295
+ "step": 5898
41296
+ },
41297
+ {
41298
+ "epoch": 0.9921789588764612,
41299
+ "grad_norm": NaN,
41300
+ "learning_rate": 1.528143938253068e-08,
41301
+ "loss": 0.0,
41302
+ "step": 5899
41303
+ },
41304
+ {
41305
+ "epoch": 0.9923471533092255,
41306
+ "grad_norm": NaN,
41307
+ "learning_rate": 1.4624282865144389e-08,
41308
+ "loss": 0.0,
41309
+ "step": 5900
41310
+ },
41311
+ {
41312
+ "epoch": 0.9925153477419898,
41313
+ "grad_norm": NaN,
41314
+ "learning_rate": 1.3981566562931702e-08,
41315
+ "loss": 0.0,
41316
+ "step": 5901
41317
+ },
41318
+ {
41319
+ "epoch": 0.992683542174754,
41320
+ "grad_norm": NaN,
41321
+ "learning_rate": 1.3353290661571871e-08,
41322
+ "loss": 0.0,
41323
+ "step": 5902
41324
+ },
41325
+ {
41326
+ "epoch": 0.9928517366075182,
41327
+ "grad_norm": NaN,
41328
+ "learning_rate": 1.2739455342558603e-08,
41329
+ "loss": 0.0,
41330
+ "step": 5903
41331
+ },
41332
+ {
41333
+ "epoch": 0.9930199310402825,
41334
+ "grad_norm": NaN,
41335
+ "learning_rate": 1.2140060783227824e-08,
41336
+ "loss": 0.0,
41337
+ "step": 5904
41338
+ },
41339
+ {
41340
+ "epoch": 0.9931881254730468,
41341
+ "grad_norm": NaN,
41342
+ "learning_rate": 1.155510715674102e-08,
41343
+ "loss": 0.0,
41344
+ "step": 5905
41345
+ },
41346
+ {
41347
+ "epoch": 0.9933563199058111,
41348
+ "grad_norm": NaN,
41349
+ "learning_rate": 1.098459463207968e-08,
41350
+ "loss": 0.0,
41351
+ "step": 5906
41352
+ },
41353
+ {
41354
+ "epoch": 0.9935245143385754,
41355
+ "grad_norm": NaN,
41356
+ "learning_rate": 1.042852337406197e-08,
41357
+ "loss": 0.0,
41358
+ "step": 5907
41359
+ },
41360
+ {
41361
+ "epoch": 0.9936927087713396,
41362
+ "grad_norm": NaN,
41363
+ "learning_rate": 9.88689354332606e-09,
41364
+ "loss": 0.0,
41365
+ "step": 5908
41366
+ },
41367
+ {
41368
+ "epoch": 0.9938609032041039,
41369
+ "grad_norm": NaN,
41370
+ "learning_rate": 9.359705296346776e-09,
41371
+ "loss": 0.0,
41372
+ "step": 5909
41373
+ },
41374
+ {
41375
+ "epoch": 0.9940290976368682,
41376
+ "grad_norm": NaN,
41377
+ "learning_rate": 8.846958785418968e-09,
41378
+ "loss": 0.0,
41379
+ "step": 5910
41380
+ },
41381
+ {
41382
+ "epoch": 0.9941972920696325,
41383
+ "grad_norm": NaN,
41384
+ "learning_rate": 8.34865415867414e-09,
41385
+ "loss": 0.0,
41386
+ "step": 5911
41387
+ },
41388
+ {
41389
+ "epoch": 0.9943654865023968,
41390
+ "grad_norm": NaN,
41391
+ "learning_rate": 7.86479156006381e-09,
41392
+ "loss": 0.0,
41393
+ "step": 5912
41394
+ },
41395
+ {
41396
+ "epoch": 0.994533680935161,
41397
+ "grad_norm": NaN,
41398
+ "learning_rate": 7.3953711293706096e-09,
41399
+ "loss": 0.0,
41400
+ "step": 5913
41401
+ },
41402
+ {
41403
+ "epoch": 0.9947018753679253,
41404
+ "grad_norm": NaN,
41405
+ "learning_rate": 6.940393002202727e-09,
41406
+ "loss": 0.0,
41407
+ "step": 5914
41408
+ },
41409
+ {
41410
+ "epoch": 0.9948700698006896,
41411
+ "grad_norm": NaN,
41412
+ "learning_rate": 6.4998573100050195e-09,
41413
+ "loss": 0.0,
41414
+ "step": 5915
41415
+ },
41416
+ {
41417
+ "epoch": 0.9950382642334539,
41418
+ "grad_norm": NaN,
41419
+ "learning_rate": 6.0737641800368e-09,
41420
+ "loss": 0.0,
41421
+ "step": 5916
41422
+ },
41423
+ {
41424
+ "epoch": 0.9952064586662182,
41425
+ "grad_norm": NaN,
41426
+ "learning_rate": 5.662113735394048e-09,
41427
+ "loss": 0.0,
41428
+ "step": 5917
41429
+ },
41430
+ {
41431
+ "epoch": 0.9953746530989824,
41432
+ "grad_norm": NaN,
41433
+ "learning_rate": 5.264906095003852e-09,
41434
+ "loss": 0.0,
41435
+ "step": 5918
41436
+ },
41437
+ {
41438
+ "epoch": 0.9955428475317467,
41439
+ "grad_norm": NaN,
41440
+ "learning_rate": 4.8821413736022115e-09,
41441
+ "loss": 0.0,
41442
+ "step": 5919
41443
+ },
41444
+ {
41445
+ "epoch": 0.995711041964511,
41446
+ "grad_norm": NaN,
41447
+ "learning_rate": 4.51381968177289e-09,
41448
+ "loss": 0.0,
41449
+ "step": 5920
41450
+ },
41451
+ {
41452
+ "epoch": 0.9958792363972753,
41453
+ "grad_norm": NaN,
41454
+ "learning_rate": 4.159941125925215e-09,
41455
+ "loss": 0.0,
41456
+ "step": 5921
41457
+ },
41458
+ {
41459
+ "epoch": 0.9960474308300395,
41460
+ "grad_norm": NaN,
41461
+ "learning_rate": 3.820505808277419e-09,
41462
+ "loss": 0.0,
41463
+ "step": 5922
41464
+ },
41465
+ {
41466
+ "epoch": 0.9962156252628038,
41467
+ "grad_norm": NaN,
41468
+ "learning_rate": 3.4955138269010534e-09,
41469
+ "loss": 0.0,
41470
+ "step": 5923
41471
+ },
41472
+ {
41473
+ "epoch": 0.9963838196955681,
41474
+ "grad_norm": NaN,
41475
+ "learning_rate": 3.184965275676577e-09,
41476
+ "loss": 0.0,
41477
+ "step": 5924
41478
+ },
41479
+ {
41480
+ "epoch": 0.9965520141283324,
41481
+ "grad_norm": NaN,
41482
+ "learning_rate": 2.8888602443211122e-09,
41483
+ "loss": 0.0,
41484
+ "step": 5925
41485
+ },
41486
+ {
41487
+ "epoch": 0.9967202085610967,
41488
+ "grad_norm": NaN,
41489
+ "learning_rate": 2.607198818371792e-09,
41490
+ "loss": 0.0,
41491
+ "step": 5926
41492
+ },
41493
+ {
41494
+ "epoch": 0.996888402993861,
41495
+ "grad_norm": NaN,
41496
+ "learning_rate": 2.3399810792024133e-09,
41497
+ "loss": 0.0,
41498
+ "step": 5927
41499
+ },
41500
+ {
41501
+ "epoch": 0.9970565974266252,
41502
+ "grad_norm": NaN,
41503
+ "learning_rate": 2.087207104001232e-09,
41504
+ "loss": 0.0,
41505
+ "step": 5928
41506
+ },
41507
+ {
41508
+ "epoch": 0.9972247918593895,
41509
+ "grad_norm": NaN,
41510
+ "learning_rate": 1.8488769658042693e-09,
41511
+ "loss": 0.0,
41512
+ "step": 5929
41513
+ },
41514
+ {
41515
+ "epoch": 0.9973929862921538,
41516
+ "grad_norm": NaN,
41517
+ "learning_rate": 1.624990733450904e-09,
41518
+ "loss": 0.0,
41519
+ "step": 5930
41520
+ },
41521
+ {
41522
+ "epoch": 0.997561180724918,
41523
+ "grad_norm": NaN,
41524
+ "learning_rate": 1.4155484716227296e-09,
41525
+ "loss": 0.0,
41526
+ "step": 5931
41527
+ },
41528
+ {
41529
+ "epoch": 0.9977293751576822,
41530
+ "grad_norm": NaN,
41531
+ "learning_rate": 1.220550240826901e-09,
41532
+ "loss": 0.0,
41533
+ "step": 5932
41534
+ },
41535
+ {
41536
+ "epoch": 0.9978975695904465,
41537
+ "grad_norm": NaN,
41538
+ "learning_rate": 1.0399960974016854e-09,
41539
+ "loss": 0.0,
41540
+ "step": 5933
41541
+ },
41542
+ {
41543
+ "epoch": 0.9980657640232108,
41544
+ "grad_norm": NaN,
41545
+ "learning_rate": 8.738860934942584e-10,
41546
+ "loss": 0.0,
41547
+ "step": 5934
41548
+ },
41549
+ {
41550
+ "epoch": 0.9982339584559751,
41551
+ "grad_norm": NaN,
41552
+ "learning_rate": 7.222202770995612e-10,
41553
+ "loss": 0.0,
41554
+ "step": 5935
41555
+ },
41556
+ {
41557
+ "epoch": 0.9984021528887393,
41558
+ "grad_norm": NaN,
41559
+ "learning_rate": 5.849986920325456e-10,
41560
+ "loss": 0.0,
41561
+ "step": 5936
41562
+ },
41563
+ {
41564
+ "epoch": 0.9985703473215036,
41565
+ "grad_norm": NaN,
41566
+ "learning_rate": 4.622213779392759e-10,
41567
+ "loss": 0.0,
41568
+ "step": 5937
41569
+ },
41570
+ {
41571
+ "epoch": 0.9987385417542679,
41572
+ "grad_norm": NaN,
41573
+ "learning_rate": 3.538883702747242e-10,
41574
+ "loss": 0.0,
41575
+ "step": 5938
41576
+ },
41577
+ {
41578
+ "epoch": 0.9989067361870322,
41579
+ "grad_norm": NaN,
41580
+ "learning_rate": 2.599997003471799e-10,
41581
+ "loss": 0.0,
41582
+ "step": 5939
41583
+ },
41584
+ {
41585
+ "epoch": 0.9990749306197965,
41586
+ "grad_norm": NaN,
41587
+ "learning_rate": 1.8055539527939148e-10,
41588
+ "loss": 0.0,
41589
+ "step": 5940
41590
+ },
41591
+ {
41592
+ "epoch": 0.9992431250525607,
41593
+ "grad_norm": NaN,
41594
+ "learning_rate": 1.155554780141177e-10,
41595
+ "loss": 0.0,
41596
+ "step": 5941
41597
+ },
41598
+ {
41599
+ "epoch": 0.999411319485325,
41600
+ "grad_norm": NaN,
41601
+ "learning_rate": 6.499996733633218e-11,
41602
+ "loss": 0.0,
41603
+ "step": 5942
41604
+ },
41605
+ {
41606
+ "epoch": 0.9995795139180893,
41607
+ "grad_norm": NaN,
41608
+ "learning_rate": 2.8888877851018792e-11,
41609
+ "loss": 0.0,
41610
+ "step": 5943
41611
+ },
41612
+ {
41613
+ "epoch": 0.9997477083508536,
41614
+ "grad_norm": NaN,
41615
+ "learning_rate": 7.2222199831717406e-12,
41616
+ "loss": 0.0,
41617
+ "step": 5944
41618
+ },
41619
+ {
41620
+ "epoch": 0.9999159027836179,
41621
+ "grad_norm": NaN,
41622
+ "learning_rate": 0.0,
41623
+ "loss": 0.0,
41624
+ "step": 5945
41625
  }
41626
  ],
41627
  "logging_steps": 1,
 
41636
  "should_evaluate": false,
41637
  "should_log": false,
41638
  "should_save": true,
41639
+ "should_training_stop": true
41640
  },
41641
  "attributes": {}
41642
  }
41643
  },
41644
+ "total_flos": 1.1191100899590144e+17,
41645
  "train_batch_size": 8,
41646
  "trial_name": null,
41647
  "trial_params": null