diaenra commited on
Commit
34a86d6
·
verified ·
1 Parent(s): 9432848

Training in progress, step 6630, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9424ead24c1f9928d93dcca08f08662e27a0300efc062a3a829f95c3b8e226c6
3
  size 377528296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aacc07d7a6d159824c1c0ee35e7479f76a36457267431cfb728017350d0a453
3
  size 377528296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d27d2e7252f6cdeeeabe5085916f4a1a350decae7f63cd39ae04981d36e05dc3
3
  size 755217530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9218c11f6e8ac0dd6bfed5d5c509c181cb6ba9b71314da1beccfa02b35aac35
3
  size 755217530
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20d715ec9ae5a99a51d0f413b64f52f539737bb65299888f322d1b46910817b7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a9628c6d951d35b24588dd35ad0842eb2aa397f35a7d2e98b7ac2aa77eb0f12
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0228103870c5c91f9e7c1c49686736ceb20668b7b5baf93d7127be66bdf65f06
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2efcaa017136af3f1586f7ffd299c32a1aacae942ad78e9c83f4627abad725ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9733031674208145,
5
  "eval_steps": 500,
6
- "global_step": 6453,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -45178,6 +45178,1245 @@
45178
  "learning_rate": 1.811746673659187e-07,
45179
  "loss": 1.0547,
45180
  "step": 6453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45181
  }
45182
  ],
45183
  "logging_steps": 1,
@@ -45192,12 +46431,12 @@
45192
  "should_evaluate": false,
45193
  "should_log": false,
45194
  "should_save": true,
45195
- "should_training_stop": false
45196
  },
45197
  "attributes": {}
45198
  }
45199
  },
45200
- "total_flos": 7.172916759861658e+18,
45201
  "train_batch_size": 4,
45202
  "trial_name": null,
45203
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 6630,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
45178
  "learning_rate": 1.811746673659187e-07,
45179
  "loss": 1.0547,
45180
  "step": 6453
45181
+ },
45182
+ {
45183
+ "epoch": 0.9734539969834087,
45184
+ "grad_norm": 2.080857992172241,
45185
+ "learning_rate": 1.7913449877619558e-07,
45186
+ "loss": 1.5701,
45187
+ "step": 6454
45188
+ },
45189
+ {
45190
+ "epoch": 0.973604826546003,
45191
+ "grad_norm": 1.761729121208191,
45192
+ "learning_rate": 1.7710586165564093e-07,
45193
+ "loss": 1.0911,
45194
+ "step": 6455
45195
+ },
45196
+ {
45197
+ "epoch": 0.9737556561085973,
45198
+ "grad_norm": 1.5841683149337769,
45199
+ "learning_rate": 1.750887564738124e-07,
45200
+ "loss": 0.8346,
45201
+ "step": 6456
45202
+ },
45203
+ {
45204
+ "epoch": 0.9739064856711915,
45205
+ "grad_norm": 2.080693483352661,
45206
+ "learning_rate": 1.7308318369757548e-07,
45207
+ "loss": 1.078,
45208
+ "step": 6457
45209
+ },
45210
+ {
45211
+ "epoch": 0.9740573152337858,
45212
+ "grad_norm": 1.6733683347702026,
45213
+ "learning_rate": 1.7108914379114772e-07,
45214
+ "loss": 0.9827,
45215
+ "step": 6458
45216
+ },
45217
+ {
45218
+ "epoch": 0.9742081447963801,
45219
+ "grad_norm": 1.8590120077133179,
45220
+ "learning_rate": 1.691066372160599e-07,
45221
+ "loss": 0.9611,
45222
+ "step": 6459
45223
+ },
45224
+ {
45225
+ "epoch": 0.9743589743589743,
45226
+ "grad_norm": 2.000999927520752,
45227
+ "learning_rate": 1.6713566443117833e-07,
45228
+ "loss": 1.2743,
45229
+ "step": 6460
45230
+ },
45231
+ {
45232
+ "epoch": 0.9745098039215686,
45233
+ "grad_norm": 1.6481130123138428,
45234
+ "learning_rate": 1.651762258927103e-07,
45235
+ "loss": 0.8385,
45236
+ "step": 6461
45237
+ },
45238
+ {
45239
+ "epoch": 0.9746606334841629,
45240
+ "grad_norm": 1.4775646924972534,
45241
+ "learning_rate": 1.6322832205417637e-07,
45242
+ "loss": 0.6422,
45243
+ "step": 6462
45244
+ },
45245
+ {
45246
+ "epoch": 0.9748114630467571,
45247
+ "grad_norm": 1.965220332145691,
45248
+ "learning_rate": 1.612919533664381e-07,
45249
+ "loss": 1.1127,
45250
+ "step": 6463
45251
+ },
45252
+ {
45253
+ "epoch": 0.9749622926093514,
45254
+ "grad_norm": 1.770487904548645,
45255
+ "learning_rate": 1.5936712027768695e-07,
45256
+ "loss": 0.9725,
45257
+ "step": 6464
45258
+ },
45259
+ {
45260
+ "epoch": 0.9751131221719457,
45261
+ "grad_norm": 1.8315738439559937,
45262
+ "learning_rate": 1.5745382323343883e-07,
45263
+ "loss": 0.9438,
45264
+ "step": 6465
45265
+ },
45266
+ {
45267
+ "epoch": 0.97526395173454,
45268
+ "grad_norm": 1.8729884624481201,
45269
+ "learning_rate": 1.5555206267655055e-07,
45270
+ "loss": 0.919,
45271
+ "step": 6466
45272
+ },
45273
+ {
45274
+ "epoch": 0.9754147812971342,
45275
+ "grad_norm": 2.053765058517456,
45276
+ "learning_rate": 1.5366183904719222e-07,
45277
+ "loss": 1.1069,
45278
+ "step": 6467
45279
+ },
45280
+ {
45281
+ "epoch": 0.9755656108597285,
45282
+ "grad_norm": 1.6839706897735596,
45283
+ "learning_rate": 1.5178315278287502e-07,
45284
+ "loss": 0.975,
45285
+ "step": 6468
45286
+ },
45287
+ {
45288
+ "epoch": 0.9757164404223228,
45289
+ "grad_norm": 1.818885087966919,
45290
+ "learning_rate": 1.4991600431843443e-07,
45291
+ "loss": 0.914,
45292
+ "step": 6469
45293
+ },
45294
+ {
45295
+ "epoch": 0.975867269984917,
45296
+ "grad_norm": 1.8105655908584595,
45297
+ "learning_rate": 1.48060394086047e-07,
45298
+ "loss": 0.9206,
45299
+ "step": 6470
45300
+ },
45301
+ {
45302
+ "epoch": 0.9760180995475113,
45303
+ "grad_norm": 2.1190874576568604,
45304
+ "learning_rate": 1.462163225151969e-07,
45305
+ "loss": 1.3382,
45306
+ "step": 6471
45307
+ },
45308
+ {
45309
+ "epoch": 0.9761689291101056,
45310
+ "grad_norm": 1.9208011627197266,
45311
+ "learning_rate": 1.4438379003272605e-07,
45312
+ "loss": 0.9099,
45313
+ "step": 6472
45314
+ },
45315
+ {
45316
+ "epoch": 0.9763197586726998,
45317
+ "grad_norm": 1.6620832681655884,
45318
+ "learning_rate": 1.4256279706277299e-07,
45319
+ "loss": 0.8732,
45320
+ "step": 6473
45321
+ },
45322
+ {
45323
+ "epoch": 0.9764705882352941,
45324
+ "grad_norm": 2.034463405609131,
45325
+ "learning_rate": 1.4075334402683937e-07,
45326
+ "loss": 0.9726,
45327
+ "step": 6474
45328
+ },
45329
+ {
45330
+ "epoch": 0.9766214177978884,
45331
+ "grad_norm": 1.8176192045211792,
45332
+ "learning_rate": 1.3895543134372358e-07,
45333
+ "loss": 1.0147,
45334
+ "step": 6475
45335
+ },
45336
+ {
45337
+ "epoch": 0.9767722473604826,
45338
+ "grad_norm": 1.8768322467803955,
45339
+ "learning_rate": 1.3716905942957602e-07,
45340
+ "loss": 0.9693,
45341
+ "step": 6476
45342
+ },
45343
+ {
45344
+ "epoch": 0.9769230769230769,
45345
+ "grad_norm": 1.8389067649841309,
45346
+ "learning_rate": 1.35394228697866e-07,
45347
+ "loss": 1.0102,
45348
+ "step": 6477
45349
+ },
45350
+ {
45351
+ "epoch": 0.9770739064856712,
45352
+ "grad_norm": 2.1269052028656006,
45353
+ "learning_rate": 1.3363093955939266e-07,
45354
+ "loss": 1.0953,
45355
+ "step": 6478
45356
+ },
45357
+ {
45358
+ "epoch": 0.9772247360482654,
45359
+ "grad_norm": 2.317656993865967,
45360
+ "learning_rate": 1.3187919242229063e-07,
45361
+ "loss": 1.18,
45362
+ "step": 6479
45363
+ },
45364
+ {
45365
+ "epoch": 0.9773755656108597,
45366
+ "grad_norm": 1.6912294626235962,
45367
+ "learning_rate": 1.3013898769200784e-07,
45368
+ "loss": 0.8096,
45369
+ "step": 6480
45370
+ },
45371
+ {
45372
+ "epoch": 0.977526395173454,
45373
+ "grad_norm": 1.921573281288147,
45374
+ "learning_rate": 1.2841032577133317e-07,
45375
+ "loss": 1.0757,
45376
+ "step": 6481
45377
+ },
45378
+ {
45379
+ "epoch": 0.9776772247360482,
45380
+ "grad_norm": 1.7856738567352295,
45381
+ "learning_rate": 1.2669320706037991e-07,
45382
+ "loss": 0.9704,
45383
+ "step": 6482
45384
+ },
45385
+ {
45386
+ "epoch": 0.9778280542986425,
45387
+ "grad_norm": 2.447734832763672,
45388
+ "learning_rate": 1.2498763195659125e-07,
45389
+ "loss": 1.2028,
45390
+ "step": 6483
45391
+ },
45392
+ {
45393
+ "epoch": 0.9779788838612368,
45394
+ "grad_norm": 1.6459358930587769,
45395
+ "learning_rate": 1.2329360085473472e-07,
45396
+ "loss": 0.7704,
45397
+ "step": 6484
45398
+ },
45399
+ {
45400
+ "epoch": 0.978129713423831,
45401
+ "grad_norm": 2.0553648471832275,
45402
+ "learning_rate": 1.2161111414691896e-07,
45403
+ "loss": 1.124,
45404
+ "step": 6485
45405
+ },
45406
+ {
45407
+ "epoch": 0.9782805429864253,
45408
+ "grad_norm": 1.5967365503311157,
45409
+ "learning_rate": 1.1994017222255461e-07,
45410
+ "loss": 0.8144,
45411
+ "step": 6486
45412
+ },
45413
+ {
45414
+ "epoch": 0.9784313725490196,
45415
+ "grad_norm": 1.9038810729980469,
45416
+ "learning_rate": 1.1828077546840455e-07,
45417
+ "loss": 0.9251,
45418
+ "step": 6487
45419
+ },
45420
+ {
45421
+ "epoch": 0.9785822021116138,
45422
+ "grad_norm": 1.8847215175628662,
45423
+ "learning_rate": 1.1663292426854489e-07,
45424
+ "loss": 0.9066,
45425
+ "step": 6488
45426
+ },
45427
+ {
45428
+ "epoch": 0.9787330316742081,
45429
+ "grad_norm": 2.5748424530029297,
45430
+ "learning_rate": 1.1499661900439274e-07,
45431
+ "loss": 1.0472,
45432
+ "step": 6489
45433
+ },
45434
+ {
45435
+ "epoch": 0.9788838612368024,
45436
+ "grad_norm": 2.0933051109313965,
45437
+ "learning_rate": 1.1337186005467848e-07,
45438
+ "loss": 1.2256,
45439
+ "step": 6490
45440
+ },
45441
+ {
45442
+ "epoch": 0.9790346907993966,
45443
+ "grad_norm": 2.030015707015991,
45444
+ "learning_rate": 1.1175864779547351e-07,
45445
+ "loss": 0.8935,
45446
+ "step": 6491
45447
+ },
45448
+ {
45449
+ "epoch": 0.9791855203619909,
45450
+ "grad_norm": 2.0891942977905273,
45451
+ "learning_rate": 1.101569826001625e-07,
45452
+ "loss": 1.0113,
45453
+ "step": 6492
45454
+ },
45455
+ {
45456
+ "epoch": 0.9793363499245852,
45457
+ "grad_norm": 2.1114308834075928,
45458
+ "learning_rate": 1.0856686483946555e-07,
45459
+ "loss": 1.0275,
45460
+ "step": 6493
45461
+ },
45462
+ {
45463
+ "epoch": 0.9794871794871794,
45464
+ "grad_norm": 2.236339569091797,
45465
+ "learning_rate": 1.0698829488143269e-07,
45466
+ "loss": 1.2783,
45467
+ "step": 6494
45468
+ },
45469
+ {
45470
+ "epoch": 0.9796380090497737,
45471
+ "grad_norm": 2.22591233253479,
45472
+ "learning_rate": 1.0542127309143834e-07,
45473
+ "loss": 1.1781,
45474
+ "step": 6495
45475
+ },
45476
+ {
45477
+ "epoch": 0.979788838612368,
45478
+ "grad_norm": 1.9555222988128662,
45479
+ "learning_rate": 1.0386579983217571e-07,
45480
+ "loss": 0.8283,
45481
+ "step": 6496
45482
+ },
45483
+ {
45484
+ "epoch": 0.9799396681749623,
45485
+ "grad_norm": 1.7273701429367065,
45486
+ "learning_rate": 1.02321875463679e-07,
45487
+ "loss": 0.6755,
45488
+ "step": 6497
45489
+ },
45490
+ {
45491
+ "epoch": 0.9800904977375565,
45492
+ "grad_norm": 1.6880528926849365,
45493
+ "learning_rate": 1.0078950034330681e-07,
45494
+ "loss": 0.7478,
45495
+ "step": 6498
45496
+ },
45497
+ {
45498
+ "epoch": 0.9802413273001508,
45499
+ "grad_norm": 1.6948614120483398,
45500
+ "learning_rate": 9.926867482573099e-08,
45501
+ "loss": 0.8543,
45502
+ "step": 6499
45503
+ },
45504
+ {
45505
+ "epoch": 0.9803921568627451,
45506
+ "grad_norm": 1.8066236972808838,
45507
+ "learning_rate": 9.77593992629644e-08,
45508
+ "loss": 0.7576,
45509
+ "step": 6500
45510
+ },
45511
+ {
45512
+ "epoch": 0.9805429864253393,
45513
+ "grad_norm": 1.5170810222625732,
45514
+ "learning_rate": 9.626167400433872e-08,
45515
+ "loss": 0.8879,
45516
+ "step": 6501
45517
+ },
45518
+ {
45519
+ "epoch": 0.9806938159879336,
45520
+ "grad_norm": 1.8861565589904785,
45521
+ "learning_rate": 9.477549939652108e-08,
45522
+ "loss": 1.2489,
45523
+ "step": 6502
45524
+ },
45525
+ {
45526
+ "epoch": 0.9808446455505279,
45527
+ "grad_norm": 2.005974054336548,
45528
+ "learning_rate": 9.330087578349745e-08,
45529
+ "loss": 1.0192,
45530
+ "step": 6503
45531
+ },
45532
+ {
45533
+ "epoch": 0.9809954751131221,
45534
+ "grad_norm": 1.9359047412872314,
45535
+ "learning_rate": 9.183780350657812e-08,
45536
+ "loss": 1.1492,
45537
+ "step": 6504
45538
+ },
45539
+ {
45540
+ "epoch": 0.9811463046757164,
45541
+ "grad_norm": 1.7721647024154663,
45542
+ "learning_rate": 9.038628290440887e-08,
45543
+ "loss": 0.941,
45544
+ "step": 6505
45545
+ },
45546
+ {
45547
+ "epoch": 0.9812971342383107,
45548
+ "grad_norm": 1.5403116941452026,
45549
+ "learning_rate": 8.89463143129543e-08,
45550
+ "loss": 0.8233,
45551
+ "step": 6506
45552
+ },
45553
+ {
45554
+ "epoch": 0.9814479638009049,
45555
+ "grad_norm": 1.6215004920959473,
45556
+ "learning_rate": 8.751789806550892e-08,
45557
+ "loss": 0.7563,
45558
+ "step": 6507
45559
+ },
45560
+ {
45561
+ "epoch": 0.9815987933634992,
45562
+ "grad_norm": 2.063256025314331,
45563
+ "learning_rate": 8.610103449268603e-08,
45564
+ "loss": 1.3814,
45565
+ "step": 6508
45566
+ },
45567
+ {
45568
+ "epoch": 0.9817496229260935,
45569
+ "grad_norm": 1.958470344543457,
45570
+ "learning_rate": 8.469572392243996e-08,
45571
+ "loss": 1.3061,
45572
+ "step": 6509
45573
+ },
45574
+ {
45575
+ "epoch": 0.9819004524886877,
45576
+ "grad_norm": 1.8253540992736816,
45577
+ "learning_rate": 8.330196668003831e-08,
45578
+ "loss": 1.1178,
45579
+ "step": 6510
45580
+ },
45581
+ {
45582
+ "epoch": 0.982051282051282,
45583
+ "grad_norm": 2.160386323928833,
45584
+ "learning_rate": 8.191976308807858e-08,
45585
+ "loss": 1.3507,
45586
+ "step": 6511
45587
+ },
45588
+ {
45589
+ "epoch": 0.9822021116138763,
45590
+ "grad_norm": 1.7865500450134277,
45591
+ "learning_rate": 8.054911346647709e-08,
45592
+ "loss": 1.0007,
45593
+ "step": 6512
45594
+ },
45595
+ {
45596
+ "epoch": 0.9823529411764705,
45597
+ "grad_norm": 1.9779447317123413,
45598
+ "learning_rate": 7.919001813249671e-08,
45599
+ "loss": 1.1617,
45600
+ "step": 6513
45601
+ },
45602
+ {
45603
+ "epoch": 0.9825037707390648,
45604
+ "grad_norm": 1.7435057163238525,
45605
+ "learning_rate": 7.784247740069694e-08,
45606
+ "loss": 0.8858,
45607
+ "step": 6514
45608
+ },
45609
+ {
45610
+ "epoch": 0.9826546003016591,
45611
+ "grad_norm": 1.9248754978179932,
45612
+ "learning_rate": 7.650649158298384e-08,
45613
+ "loss": 1.0385,
45614
+ "step": 6515
45615
+ },
45616
+ {
45617
+ "epoch": 0.9828054298642533,
45618
+ "grad_norm": 1.8311914205551147,
45619
+ "learning_rate": 7.518206098858782e-08,
45620
+ "loss": 0.8934,
45621
+ "step": 6516
45622
+ },
45623
+ {
45624
+ "epoch": 0.9829562594268476,
45625
+ "grad_norm": 1.7870116233825684,
45626
+ "learning_rate": 7.386918592405256e-08,
45627
+ "loss": 1.0486,
45628
+ "step": 6517
45629
+ },
45630
+ {
45631
+ "epoch": 0.9831070889894419,
45632
+ "grad_norm": 2.1831729412078857,
45633
+ "learning_rate": 7.256786669325721e-08,
45634
+ "loss": 1.2173,
45635
+ "step": 6518
45636
+ },
45637
+ {
45638
+ "epoch": 0.9832579185520361,
45639
+ "grad_norm": 1.9115618467330933,
45640
+ "learning_rate": 7.127810359740527e-08,
45641
+ "loss": 0.8471,
45642
+ "step": 6519
45643
+ },
45644
+ {
45645
+ "epoch": 0.9834087481146304,
45646
+ "grad_norm": 2.1784827709198,
45647
+ "learning_rate": 6.999989693501908e-08,
45648
+ "loss": 1.3449,
45649
+ "step": 6520
45650
+ },
45651
+ {
45652
+ "epoch": 0.9835595776772247,
45653
+ "grad_norm": 1.8847562074661255,
45654
+ "learning_rate": 6.873324700195083e-08,
45655
+ "loss": 1.0211,
45656
+ "step": 6521
45657
+ },
45658
+ {
45659
+ "epoch": 0.983710407239819,
45660
+ "grad_norm": 2.0372416973114014,
45661
+ "learning_rate": 6.74781540913827e-08,
45662
+ "loss": 1.1225,
45663
+ "step": 6522
45664
+ },
45665
+ {
45666
+ "epoch": 0.9838612368024132,
45667
+ "grad_norm": 1.8074803352355957,
45668
+ "learning_rate": 6.623461849381563e-08,
45669
+ "loss": 0.942,
45670
+ "step": 6523
45671
+ },
45672
+ {
45673
+ "epoch": 0.9840120663650075,
45674
+ "grad_norm": 1.759004831314087,
45675
+ "learning_rate": 6.50026404970694e-08,
45676
+ "loss": 1.0119,
45677
+ "step": 6524
45678
+ },
45679
+ {
45680
+ "epoch": 0.9841628959276018,
45681
+ "grad_norm": 1.897386074066162,
45682
+ "learning_rate": 6.378222038630477e-08,
45683
+ "loss": 1.1226,
45684
+ "step": 6525
45685
+ },
45686
+ {
45687
+ "epoch": 0.984313725490196,
45688
+ "grad_norm": 1.8542094230651855,
45689
+ "learning_rate": 6.257335844399581e-08,
45690
+ "loss": 0.9392,
45691
+ "step": 6526
45692
+ },
45693
+ {
45694
+ "epoch": 0.9844645550527904,
45695
+ "grad_norm": 1.5449867248535156,
45696
+ "learning_rate": 6.137605494994092e-08,
45697
+ "loss": 0.6639,
45698
+ "step": 6527
45699
+ },
45700
+ {
45701
+ "epoch": 0.9846153846153847,
45702
+ "grad_norm": 1.8442318439483643,
45703
+ "learning_rate": 6.019031018126841e-08,
45704
+ "loss": 0.9855,
45705
+ "step": 6528
45706
+ },
45707
+ {
45708
+ "epoch": 0.9847662141779789,
45709
+ "grad_norm": 2.0847725868225098,
45710
+ "learning_rate": 5.9016124412430987e-08,
45711
+ "loss": 1.1598,
45712
+ "step": 6529
45713
+ },
45714
+ {
45715
+ "epoch": 0.9849170437405732,
45716
+ "grad_norm": 1.7849246263504028,
45717
+ "learning_rate": 5.785349791520012e-08,
45718
+ "loss": 0.9352,
45719
+ "step": 6530
45720
+ },
45721
+ {
45722
+ "epoch": 0.9850678733031675,
45723
+ "grad_norm": 1.9082951545715332,
45724
+ "learning_rate": 5.670243095867722e-08,
45725
+ "loss": 0.911,
45726
+ "step": 6531
45727
+ },
45728
+ {
45729
+ "epoch": 0.9852187028657617,
45730
+ "grad_norm": 1.6478004455566406,
45731
+ "learning_rate": 5.5562923809293624e-08,
45732
+ "loss": 0.7075,
45733
+ "step": 6532
45734
+ },
45735
+ {
45736
+ "epoch": 0.985369532428356,
45737
+ "grad_norm": 2.0368950366973877,
45738
+ "learning_rate": 5.4434976730788346e-08,
45739
+ "loss": 1.2577,
45740
+ "step": 6533
45741
+ },
45742
+ {
45743
+ "epoch": 0.9855203619909503,
45744
+ "grad_norm": 2.204418897628784,
45745
+ "learning_rate": 5.331858998423589e-08,
45746
+ "loss": 1.4991,
45747
+ "step": 6534
45748
+ },
45749
+ {
45750
+ "epoch": 0.9856711915535445,
45751
+ "grad_norm": 1.966247797012329,
45752
+ "learning_rate": 5.221376382803511e-08,
45753
+ "loss": 1.0373,
45754
+ "step": 6535
45755
+ },
45756
+ {
45757
+ "epoch": 0.9858220211161388,
45758
+ "grad_norm": 1.7758862972259521,
45759
+ "learning_rate": 5.1120498517914785e-08,
45760
+ "loss": 0.9066,
45761
+ "step": 6536
45762
+ },
45763
+ {
45764
+ "epoch": 0.9859728506787331,
45765
+ "grad_norm": 1.944427251815796,
45766
+ "learning_rate": 5.0038794306905834e-08,
45767
+ "loss": 1.1688,
45768
+ "step": 6537
45769
+ },
45770
+ {
45771
+ "epoch": 0.9861236802413273,
45772
+ "grad_norm": 1.944295048713684,
45773
+ "learning_rate": 4.896865144539131e-08,
45774
+ "loss": 0.981,
45775
+ "step": 6538
45776
+ },
45777
+ {
45778
+ "epoch": 0.9862745098039216,
45779
+ "grad_norm": 1.916852593421936,
45780
+ "learning_rate": 4.7910070181061974e-08,
45781
+ "loss": 1.1284,
45782
+ "step": 6539
45783
+ },
45784
+ {
45785
+ "epoch": 0.9864253393665159,
45786
+ "grad_norm": 2.07956862449646,
45787
+ "learning_rate": 4.686305075892738e-08,
45788
+ "loss": 1.198,
45789
+ "step": 6540
45790
+ },
45791
+ {
45792
+ "epoch": 0.9865761689291102,
45793
+ "grad_norm": 2.1623141765594482,
45794
+ "learning_rate": 4.5827593421338134e-08,
45795
+ "loss": 1.3789,
45796
+ "step": 6541
45797
+ },
45798
+ {
45799
+ "epoch": 0.9867269984917044,
45800
+ "grad_norm": 2.1320431232452393,
45801
+ "learning_rate": 4.480369840795806e-08,
45802
+ "loss": 1.2926,
45803
+ "step": 6542
45804
+ },
45805
+ {
45806
+ "epoch": 0.9868778280542987,
45807
+ "grad_norm": 2.1627719402313232,
45808
+ "learning_rate": 4.379136595577537e-08,
45809
+ "loss": 1.1043,
45810
+ "step": 6543
45811
+ },
45812
+ {
45813
+ "epoch": 0.987028657616893,
45814
+ "grad_norm": 2.2448055744171143,
45815
+ "learning_rate": 4.2790596299102646e-08,
45816
+ "loss": 1.14,
45817
+ "step": 6544
45818
+ },
45819
+ {
45820
+ "epoch": 0.9871794871794872,
45821
+ "grad_norm": 1.8780425786972046,
45822
+ "learning_rate": 4.1801389669576805e-08,
45823
+ "loss": 0.9128,
45824
+ "step": 6545
45825
+ },
45826
+ {
45827
+ "epoch": 0.9873303167420815,
45828
+ "grad_norm": 2.0032410621643066,
45829
+ "learning_rate": 4.082374629615915e-08,
45830
+ "loss": 0.9716,
45831
+ "step": 6546
45832
+ },
45833
+ {
45834
+ "epoch": 0.9874811463046758,
45835
+ "grad_norm": 1.782638669013977,
45836
+ "learning_rate": 3.985766640513533e-08,
45837
+ "loss": 0.8816,
45838
+ "step": 6547
45839
+ },
45840
+ {
45841
+ "epoch": 0.98763197586727,
45842
+ "grad_norm": 1.7118865251541138,
45843
+ "learning_rate": 3.890315022010982e-08,
45844
+ "loss": 0.7538,
45845
+ "step": 6548
45846
+ },
45847
+ {
45848
+ "epoch": 0.9877828054298643,
45849
+ "grad_norm": 1.3487775325775146,
45850
+ "learning_rate": 3.7960197962011447e-08,
45851
+ "loss": 0.5478,
45852
+ "step": 6549
45853
+ },
45854
+ {
45855
+ "epoch": 0.9879336349924586,
45856
+ "grad_norm": 1.5120551586151123,
45857
+ "learning_rate": 3.7028809849098955e-08,
45858
+ "loss": 0.578,
45859
+ "step": 6550
45860
+ },
45861
+ {
45862
+ "epoch": 0.9880844645550528,
45863
+ "grad_norm": 1.6499603986740112,
45864
+ "learning_rate": 3.610898609694991e-08,
45865
+ "loss": 0.9084,
45866
+ "step": 6551
45867
+ },
45868
+ {
45869
+ "epoch": 0.9882352941176471,
45870
+ "grad_norm": 2.073840379714966,
45871
+ "learning_rate": 3.520072691846621e-08,
45872
+ "loss": 1.3946,
45873
+ "step": 6552
45874
+ },
45875
+ {
45876
+ "epoch": 0.9883861236802414,
45877
+ "grad_norm": 2.301109552383423,
45878
+ "learning_rate": 3.43040325238686e-08,
45879
+ "loss": 1.5888,
45880
+ "step": 6553
45881
+ },
45882
+ {
45883
+ "epoch": 0.9885369532428356,
45884
+ "grad_norm": 2.0262253284454346,
45885
+ "learning_rate": 3.341890312070772e-08,
45886
+ "loss": 1.198,
45887
+ "step": 6554
45888
+ },
45889
+ {
45890
+ "epoch": 0.9886877828054299,
45891
+ "grad_norm": 1.9711953401565552,
45892
+ "learning_rate": 3.254533891385303e-08,
45893
+ "loss": 0.998,
45894
+ "step": 6555
45895
+ },
45896
+ {
45897
+ "epoch": 0.9888386123680242,
45898
+ "grad_norm": 1.9102327823638916,
45899
+ "learning_rate": 3.168334010549834e-08,
45900
+ "loss": 1.16,
45901
+ "step": 6556
45902
+ },
45903
+ {
45904
+ "epoch": 0.9889894419306184,
45905
+ "grad_norm": 1.8268523216247559,
45906
+ "learning_rate": 3.083290689516183e-08,
45907
+ "loss": 1.085,
45908
+ "step": 6557
45909
+ },
45910
+ {
45911
+ "epoch": 0.9891402714932127,
45912
+ "grad_norm": 1.8766851425170898,
45913
+ "learning_rate": 2.999403947968049e-08,
45914
+ "loss": 1.0967,
45915
+ "step": 6558
45916
+ },
45917
+ {
45918
+ "epoch": 0.989291101055807,
45919
+ "grad_norm": 1.867449164390564,
45920
+ "learning_rate": 2.9166738053221232e-08,
45921
+ "loss": 0.9528,
45922
+ "step": 6559
45923
+ },
45924
+ {
45925
+ "epoch": 0.9894419306184012,
45926
+ "grad_norm": 2.329340696334839,
45927
+ "learning_rate": 2.8351002807269767e-08,
45928
+ "loss": 1.5427,
45929
+ "step": 6560
45930
+ },
45931
+ {
45932
+ "epoch": 0.9895927601809955,
45933
+ "grad_norm": 1.7027629613876343,
45934
+ "learning_rate": 2.7546833930636173e-08,
45935
+ "loss": 0.8837,
45936
+ "step": 6561
45937
+ },
45938
+ {
45939
+ "epoch": 0.9897435897435898,
45940
+ "grad_norm": 1.9053348302841187,
45941
+ "learning_rate": 2.6754231609449344e-08,
45942
+ "loss": 1.0772,
45943
+ "step": 6562
45944
+ },
45945
+ {
45946
+ "epoch": 0.989894419306184,
45947
+ "grad_norm": 1.7392765283584595,
45948
+ "learning_rate": 2.5973196027162527e-08,
45949
+ "loss": 0.8626,
45950
+ "step": 6563
45951
+ },
45952
+ {
45953
+ "epoch": 0.9900452488687783,
45954
+ "grad_norm": 1.7653361558914185,
45955
+ "learning_rate": 2.5203727364558892e-08,
45956
+ "loss": 1.1656,
45957
+ "step": 6564
45958
+ },
45959
+ {
45960
+ "epoch": 0.9901960784313726,
45961
+ "grad_norm": 2.0128557682037354,
45962
+ "learning_rate": 2.4445825799729317e-08,
45963
+ "loss": 1.1097,
45964
+ "step": 6565
45965
+ },
45966
+ {
45967
+ "epoch": 0.9903469079939669,
45968
+ "grad_norm": 1.5651352405548096,
45969
+ "learning_rate": 2.3699491508105687e-08,
45970
+ "loss": 0.7791,
45971
+ "step": 6566
45972
+ },
45973
+ {
45974
+ "epoch": 0.9904977375565611,
45975
+ "grad_norm": 1.7918542623519897,
45976
+ "learning_rate": 2.2964724662433156e-08,
45977
+ "loss": 1.0578,
45978
+ "step": 6567
45979
+ },
45980
+ {
45981
+ "epoch": 0.9906485671191554,
45982
+ "grad_norm": 2.0644359588623047,
45983
+ "learning_rate": 2.224152543277569e-08,
45984
+ "loss": 1.1738,
45985
+ "step": 6568
45986
+ },
45987
+ {
45988
+ "epoch": 0.9907993966817497,
45989
+ "grad_norm": 1.8133044242858887,
45990
+ "learning_rate": 2.152989398652161e-08,
45991
+ "loss": 1.0495,
45992
+ "step": 6569
45993
+ },
45994
+ {
45995
+ "epoch": 0.9909502262443439,
45996
+ "grad_norm": 1.6463404893875122,
45997
+ "learning_rate": 2.0829830488389156e-08,
45998
+ "loss": 0.726,
45999
+ "step": 6570
46000
+ },
46001
+ {
46002
+ "epoch": 0.9911010558069382,
46003
+ "grad_norm": 1.8328197002410889,
46004
+ "learning_rate": 2.014133510041538e-08,
46005
+ "loss": 0.976,
46006
+ "step": 6571
46007
+ },
46008
+ {
46009
+ "epoch": 0.9912518853695325,
46010
+ "grad_norm": 1.6490834951400757,
46011
+ "learning_rate": 1.9464407981956146e-08,
46012
+ "loss": 0.8504,
46013
+ "step": 6572
46014
+ },
46015
+ {
46016
+ "epoch": 0.9914027149321267,
46017
+ "grad_norm": 1.933363914489746,
46018
+ "learning_rate": 1.879904928969167e-08,
46019
+ "loss": 0.9774,
46020
+ "step": 6573
46021
+ },
46022
+ {
46023
+ "epoch": 0.991553544494721,
46024
+ "grad_norm": 1.8430428504943848,
46025
+ "learning_rate": 1.8145259177621e-08,
46026
+ "loss": 0.8791,
46027
+ "step": 6574
46028
+ },
46029
+ {
46030
+ "epoch": 0.9917043740573153,
46031
+ "grad_norm": 2.158477544784546,
46032
+ "learning_rate": 1.7503037797078626e-08,
46033
+ "loss": 1.3405,
46034
+ "step": 6575
46035
+ },
46036
+ {
46037
+ "epoch": 0.9918552036199095,
46038
+ "grad_norm": 2.0298755168914795,
46039
+ "learning_rate": 1.687238529670121e-08,
46040
+ "loss": 1.2048,
46041
+ "step": 6576
46042
+ },
46043
+ {
46044
+ "epoch": 0.9920060331825038,
46045
+ "grad_norm": 2.078059196472168,
46046
+ "learning_rate": 1.6253301822466428e-08,
46047
+ "loss": 1.1711,
46048
+ "step": 6577
46049
+ },
46050
+ {
46051
+ "epoch": 0.9921568627450981,
46052
+ "grad_norm": 1.9220235347747803,
46053
+ "learning_rate": 1.5645787517670762e-08,
46054
+ "loss": 0.8961,
46055
+ "step": 6578
46056
+ },
46057
+ {
46058
+ "epoch": 0.9923076923076923,
46059
+ "grad_norm": 2.269637107849121,
46060
+ "learning_rate": 1.5049842522918412e-08,
46061
+ "loss": 1.4525,
46062
+ "step": 6579
46063
+ },
46064
+ {
46065
+ "epoch": 0.9924585218702866,
46066
+ "grad_norm": 2.0240731239318848,
46067
+ "learning_rate": 1.4465466976149034e-08,
46068
+ "loss": 1.2533,
46069
+ "step": 6580
46070
+ },
46071
+ {
46072
+ "epoch": 0.9926093514328809,
46073
+ "grad_norm": 2.1804757118225098,
46074
+ "learning_rate": 1.38926610126211e-08,
46075
+ "loss": 1.1277,
46076
+ "step": 6581
46077
+ },
46078
+ {
46079
+ "epoch": 0.9927601809954751,
46080
+ "grad_norm": 2.2509958744049072,
46081
+ "learning_rate": 1.3331424764922994e-08,
46082
+ "loss": 1.1611,
46083
+ "step": 6582
46084
+ },
46085
+ {
46086
+ "epoch": 0.9929110105580694,
46087
+ "grad_norm": 1.9428391456604004,
46088
+ "learning_rate": 1.2781758362945262e-08,
46089
+ "loss": 1.0481,
46090
+ "step": 6583
46091
+ },
46092
+ {
46093
+ "epoch": 0.9930618401206637,
46094
+ "grad_norm": 1.855837106704712,
46095
+ "learning_rate": 1.224366193392501e-08,
46096
+ "loss": 0.8939,
46097
+ "step": 6584
46098
+ },
46099
+ {
46100
+ "epoch": 0.9932126696832579,
46101
+ "grad_norm": 1.8608096837997437,
46102
+ "learning_rate": 1.1717135602401507e-08,
46103
+ "loss": 0.9874,
46104
+ "step": 6585
46105
+ },
46106
+ {
46107
+ "epoch": 0.9933634992458522,
46108
+ "grad_norm": 1.6831547021865845,
46109
+ "learning_rate": 1.1202179490243937e-08,
46110
+ "loss": 0.9085,
46111
+ "step": 6586
46112
+ },
46113
+ {
46114
+ "epoch": 0.9935143288084465,
46115
+ "grad_norm": 2.1930525302886963,
46116
+ "learning_rate": 1.069879371664584e-08,
46117
+ "loss": 1.0266,
46118
+ "step": 6587
46119
+ },
46120
+ {
46121
+ "epoch": 0.9936651583710407,
46122
+ "grad_norm": 1.843945026397705,
46123
+ "learning_rate": 1.0206978398119572e-08,
46124
+ "loss": 1.0207,
46125
+ "step": 6588
46126
+ },
46127
+ {
46128
+ "epoch": 0.993815987933635,
46129
+ "grad_norm": 1.9906690120697021,
46130
+ "learning_rate": 9.72673364850185e-09,
46131
+ "loss": 1.1523,
46132
+ "step": 6589
46133
+ },
46134
+ {
46135
+ "epoch": 0.9939668174962293,
46136
+ "grad_norm": 2.588918685913086,
46137
+ "learning_rate": 9.258059578948209e-09,
46138
+ "loss": 1.4171,
46139
+ "step": 6590
46140
+ },
46141
+ {
46142
+ "epoch": 0.9941176470588236,
46143
+ "grad_norm": 2.232747793197632,
46144
+ "learning_rate": 8.800956297932983e-09,
46145
+ "loss": 1.1359,
46146
+ "step": 6591
46147
+ },
46148
+ {
46149
+ "epoch": 0.9942684766214178,
46150
+ "grad_norm": 1.9260307550430298,
46151
+ "learning_rate": 8.35542391126598e-09,
46152
+ "loss": 0.8841,
46153
+ "step": 6592
46154
+ },
46155
+ {
46156
+ "epoch": 0.9944193061840121,
46157
+ "grad_norm": 2.2145347595214844,
46158
+ "learning_rate": 7.921462522059164e-09,
46159
+ "loss": 1.0699,
46160
+ "step": 6593
46161
+ },
46162
+ {
46163
+ "epoch": 0.9945701357466064,
46164
+ "grad_norm": 2.3072757720947266,
46165
+ "learning_rate": 7.499072230765514e-09,
46166
+ "loss": 1.0223,
46167
+ "step": 6594
46168
+ },
46169
+ {
46170
+ "epoch": 0.9947209653092006,
46171
+ "grad_norm": 1.863654613494873,
46172
+ "learning_rate": 7.088253135145717e-09,
46173
+ "loss": 0.9445,
46174
+ "step": 6595
46175
+ },
46176
+ {
46177
+ "epoch": 0.9948717948717949,
46178
+ "grad_norm": 2.0308523178100586,
46179
+ "learning_rate": 6.6890053302848255e-09,
46180
+ "loss": 1.0649,
46181
+ "step": 6596
46182
+ },
46183
+ {
46184
+ "epoch": 0.9950226244343892,
46185
+ "grad_norm": 1.6063568592071533,
46186
+ "learning_rate": 6.301328908597803e-09,
46187
+ "loss": 0.7339,
46188
+ "step": 6597
46189
+ },
46190
+ {
46191
+ "epoch": 0.9951734539969834,
46192
+ "grad_norm": 1.5589513778686523,
46193
+ "learning_rate": 5.925223959818427e-09,
46194
+ "loss": 0.5174,
46195
+ "step": 6598
46196
+ },
46197
+ {
46198
+ "epoch": 0.9953242835595777,
46199
+ "grad_norm": 1.47749924659729,
46200
+ "learning_rate": 5.560690570988181e-09,
46201
+ "loss": 0.6071,
46202
+ "step": 6599
46203
+ },
46204
+ {
46205
+ "epoch": 0.995475113122172,
46206
+ "grad_norm": 2.1695616245269775,
46207
+ "learning_rate": 5.207728826495118e-09,
46208
+ "loss": 1.0439,
46209
+ "step": 6600
46210
+ },
46211
+ {
46212
+ "epoch": 0.9956259426847662,
46213
+ "grad_norm": 1.7432292699813843,
46214
+ "learning_rate": 4.866338808023896e-09,
46215
+ "loss": 1.0061,
46216
+ "step": 6601
46217
+ },
46218
+ {
46219
+ "epoch": 0.9957767722473605,
46220
+ "grad_norm": 1.63108491897583,
46221
+ "learning_rate": 4.53652059459464e-09,
46222
+ "loss": 0.9774,
46223
+ "step": 6602
46224
+ },
46225
+ {
46226
+ "epoch": 0.9959276018099548,
46227
+ "grad_norm": 1.8450249433517456,
46228
+ "learning_rate": 4.218274262551835e-09,
46229
+ "loss": 1.0761,
46230
+ "step": 6603
46231
+ },
46232
+ {
46233
+ "epoch": 0.996078431372549,
46234
+ "grad_norm": 1.7824820280075073,
46235
+ "learning_rate": 3.91159988555323e-09,
46236
+ "loss": 1.0144,
46237
+ "step": 6604
46238
+ },
46239
+ {
46240
+ "epoch": 0.9962292609351433,
46241
+ "grad_norm": 1.6914958953857422,
46242
+ "learning_rate": 3.6164975345809316e-09,
46243
+ "loss": 0.8875,
46244
+ "step": 6605
46245
+ },
46246
+ {
46247
+ "epoch": 0.9963800904977376,
46248
+ "grad_norm": 1.807647943496704,
46249
+ "learning_rate": 3.3329672779414124e-09,
46250
+ "loss": 0.9614,
46251
+ "step": 6606
46252
+ },
46253
+ {
46254
+ "epoch": 0.9965309200603318,
46255
+ "grad_norm": 1.8976490497589111,
46256
+ "learning_rate": 3.061009181254404e-09,
46257
+ "loss": 1.1459,
46258
+ "step": 6607
46259
+ },
46260
+ {
46261
+ "epoch": 0.9966817496229261,
46262
+ "grad_norm": 1.571880578994751,
46263
+ "learning_rate": 2.800623307469552e-09,
46264
+ "loss": 0.7526,
46265
+ "step": 6608
46266
+ },
46267
+ {
46268
+ "epoch": 0.9968325791855204,
46269
+ "grad_norm": 2.0932419300079346,
46270
+ "learning_rate": 2.5518097168608646e-09,
46271
+ "loss": 1.3696,
46272
+ "step": 6609
46273
+ },
46274
+ {
46275
+ "epoch": 0.9969834087481146,
46276
+ "grad_norm": 1.9494574069976807,
46277
+ "learning_rate": 2.3145684670100587e-09,
46278
+ "loss": 1.2108,
46279
+ "step": 6610
46280
+ },
46281
+ {
46282
+ "epoch": 0.9971342383107089,
46283
+ "grad_norm": 1.837791919708252,
46284
+ "learning_rate": 2.0888996128343164e-09,
46285
+ "loss": 1.0885,
46286
+ "step": 6611
46287
+ },
46288
+ {
46289
+ "epoch": 0.9972850678733032,
46290
+ "grad_norm": 1.9261425733566284,
46291
+ "learning_rate": 1.8748032065640797e-09,
46292
+ "loss": 1.0092,
46293
+ "step": 6612
46294
+ },
46295
+ {
46296
+ "epoch": 0.9974358974358974,
46297
+ "grad_norm": 1.8976205587387085,
46298
+ "learning_rate": 1.6722792977541535e-09,
46299
+ "loss": 0.9953,
46300
+ "step": 6613
46301
+ },
46302
+ {
46303
+ "epoch": 0.9975867269984917,
46304
+ "grad_norm": 2.369472026824951,
46305
+ "learning_rate": 1.4813279332781538e-09,
46306
+ "loss": 1.2183,
46307
+ "step": 6614
46308
+ },
46309
+ {
46310
+ "epoch": 0.997737556561086,
46311
+ "grad_norm": 2.169970750808716,
46312
+ "learning_rate": 1.3019491573396104e-09,
46313
+ "loss": 1.3011,
46314
+ "step": 6615
46315
+ },
46316
+ {
46317
+ "epoch": 0.9978883861236802,
46318
+ "grad_norm": 1.7789149284362793,
46319
+ "learning_rate": 1.1341430114553132e-09,
46320
+ "loss": 0.8241,
46321
+ "step": 6616
46322
+ },
46323
+ {
46324
+ "epoch": 0.9980392156862745,
46325
+ "grad_norm": 1.9637186527252197,
46326
+ "learning_rate": 9.779095344608636e-10,
46327
+ "loss": 0.9467,
46328
+ "step": 6617
46329
+ },
46330
+ {
46331
+ "epoch": 0.9981900452488688,
46332
+ "grad_norm": 1.9668041467666626,
46333
+ "learning_rate": 8.332487625217767e-10,
46334
+ "loss": 1.0528,
46335
+ "step": 6618
46336
+ },
46337
+ {
46338
+ "epoch": 0.998340874811463,
46339
+ "grad_norm": 1.6691468954086304,
46340
+ "learning_rate": 7.001607291168278e-10,
46341
+ "loss": 0.8094,
46342
+ "step": 6619
46343
+ },
46344
+ {
46345
+ "epoch": 0.9984917043740573,
46346
+ "grad_norm": 2.0753369331359863,
46347
+ "learning_rate": 5.786454650602568e-10,
46348
+ "loss": 1.1893,
46349
+ "step": 6620
46350
+ },
46351
+ {
46352
+ "epoch": 0.9986425339366516,
46353
+ "grad_norm": 2.0562314987182617,
46354
+ "learning_rate": 4.68702998462911e-10,
46355
+ "loss": 1.2021,
46356
+ "step": 6621
46357
+ },
46358
+ {
46359
+ "epoch": 0.9987933634992459,
46360
+ "grad_norm": 2.3388001918792725,
46361
+ "learning_rate": 3.7033335478775523e-10,
46362
+ "loss": 1.2424,
46363
+ "step": 6622
46364
+ },
46365
+ {
46366
+ "epoch": 0.9989441930618401,
46367
+ "grad_norm": 1.9954073429107666,
46368
+ "learning_rate": 2.8353655679436154e-10,
46369
+ "loss": 0.9312,
46370
+ "step": 6623
46371
+ },
46372
+ {
46373
+ "epoch": 0.9990950226244344,
46374
+ "grad_norm": 2.308021068572998,
46375
+ "learning_rate": 2.0831262457221557e-10,
46376
+ "loss": 1.4345,
46377
+ "step": 6624
46378
+ },
46379
+ {
46380
+ "epoch": 0.9992458521870287,
46381
+ "grad_norm": 1.8140825033187866,
46382
+ "learning_rate": 1.4466157553516547e-10,
46383
+ "loss": 0.8634,
46384
+ "step": 6625
46385
+ },
46386
+ {
46387
+ "epoch": 0.9993966817496229,
46388
+ "grad_norm": 1.8196262121200562,
46389
+ "learning_rate": 9.258342441587076e-11,
46390
+ "loss": 0.8215,
46391
+ "step": 6626
46392
+ },
46393
+ {
46394
+ "epoch": 0.9995475113122172,
46395
+ "grad_norm": 1.757359504699707,
46396
+ "learning_rate": 5.207818326580238e-11,
46397
+ "loss": 0.8753,
46398
+ "step": 6627
46399
+ },
46400
+ {
46401
+ "epoch": 0.9996983408748115,
46402
+ "grad_norm": 2.1403098106384277,
46403
+ "learning_rate": 2.3145861460793782e-11,
46404
+ "loss": 1.122,
46405
+ "step": 6628
46406
+ },
46407
+ {
46408
+ "epoch": 0.9998491704374057,
46409
+ "grad_norm": 1.536190390586853,
46410
+ "learning_rate": 5.78646570104091e-12,
46411
+ "loss": 0.7415,
46412
+ "step": 6629
46413
+ },
46414
+ {
46415
+ "epoch": 1.0,
46416
+ "grad_norm": 2.804316520690918,
46417
+ "learning_rate": 0.0,
46418
+ "loss": 1.0438,
46419
+ "step": 6630
46420
  }
46421
  ],
46422
  "logging_steps": 1,
 
46431
  "should_evaluate": false,
46432
  "should_log": false,
46433
  "should_save": true,
46434
+ "should_training_stop": true
46435
  },
46436
  "attributes": {}
46437
  }
46438
  },
46439
+ "total_flos": 7.369274315715379e+18,
46440
  "train_batch_size": 4,
46441
  "trial_name": null,
46442
  "trial_params": null