Nadav commited on
Commit
6eaaa6f
·
1 Parent(s): 1990806

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b8731a9c673247298e68f979a882fa4b9de9169b10db2da5d12376e0718916d
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f683478bb8b21e00c0c74eb9d446bcf7b5cb5d8713b232385c7f1a4c9244d9
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba5c39390d379753ab42fe112a3e238b37bd25f743670804092f31c48fc72aa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ece555ebd5e5c6fe697b5c0892a0069538c02673bcdea4dd5ba7a2fbb86221c
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef8ef3ecf4eb2a0ae5fdf4ba1f3938005adc2342e5e38caa973575a3b27c37b8
3
  size 19603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1563cbc1ed661f4d7cd48c7ebfe5d97447f92fa708e98efa17ebd50f337cd1f3
3
  size 19603
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20e6cdd3a9c57310ccd917846a32eab047897fb724a236b5fa11cdca42eb70d1
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f28e7dd5089fc108050723c00a6956391616d298b19b067cb1fe799c89cf3b3c
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8ca83639afb4d839ab1a4377e720e1b01c8d2a67d6f20cb38b162d3d7af2df
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc741bd201acfbf14b76c991f1da2695d0be2064cfb57a0af565660dec5446de
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 18.079307998833706,
5
- "global_step": 98000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -118390,11 +118390,2427 @@
118390
  "eval_samples_per_second": 63.888,
118391
  "eval_steps_per_second": 0.677,
118392
  "step": 98000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118393
  }
118394
  ],
118395
  "max_steps": 100000,
118396
  "num_train_epochs": 20,
118397
- "total_flos": 1.030533490112957e+22,
118398
  "trial_name": null,
118399
  "trial_params": null
118400
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 18.46807269899893,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
118390
  "eval_samples_per_second": 63.888,
118391
  "eval_steps_per_second": 0.677,
118392
  "step": 98000
118393
+ },
118394
+ {
118395
+ "epoch": 18.08,
118396
+ "learning_rate": 1.0218966728802669e-05,
118397
+ "loss": 0.4018,
118398
+ "step": 98005
118399
+ },
118400
+ {
118401
+ "epoch": 18.08,
118402
+ "learning_rate": 1.021789082613816e-05,
118403
+ "loss": 0.4046,
118404
+ "step": 98010
118405
+ },
118406
+ {
118407
+ "epoch": 18.08,
118408
+ "learning_rate": 1.0216817570553575e-05,
118409
+ "loss": 0.4138,
118410
+ "step": 98015
118411
+ },
118412
+ {
118413
+ "epoch": 18.08,
118414
+ "learning_rate": 1.0215746962075409e-05,
118415
+ "loss": 0.4123,
118416
+ "step": 98020
118417
+ },
118418
+ {
118419
+ "epoch": 18.08,
118420
+ "learning_rate": 1.0214679000730085e-05,
118421
+ "loss": 0.4035,
118422
+ "step": 98025
118423
+ },
118424
+ {
118425
+ "epoch": 18.09,
118426
+ "learning_rate": 1.0213613686543943e-05,
118427
+ "loss": 0.4105,
118428
+ "step": 98030
118429
+ },
118430
+ {
118431
+ "epoch": 18.09,
118432
+ "learning_rate": 1.0212551019543276e-05,
118433
+ "loss": 0.4044,
118434
+ "step": 98035
118435
+ },
118436
+ {
118437
+ "epoch": 18.09,
118438
+ "learning_rate": 1.0211490999754295e-05,
118439
+ "loss": 0.411,
118440
+ "step": 98040
118441
+ },
118442
+ {
118443
+ "epoch": 18.09,
118444
+ "learning_rate": 1.0210433627203157e-05,
118445
+ "loss": 0.4045,
118446
+ "step": 98045
118447
+ },
118448
+ {
118449
+ "epoch": 18.09,
118450
+ "learning_rate": 1.0209378901915956e-05,
118451
+ "loss": 0.4116,
118452
+ "step": 98050
118453
+ },
118454
+ {
118455
+ "epoch": 18.09,
118456
+ "learning_rate": 1.020832682391872e-05,
118457
+ "loss": 0.4044,
118458
+ "step": 98055
118459
+ },
118460
+ {
118461
+ "epoch": 18.09,
118462
+ "learning_rate": 1.0207277393237397e-05,
118463
+ "loss": 0.4102,
118464
+ "step": 98060
118465
+ },
118466
+ {
118467
+ "epoch": 18.09,
118468
+ "learning_rate": 1.0206230609897896e-05,
118469
+ "loss": 0.4097,
118470
+ "step": 98065
118471
+ },
118472
+ {
118473
+ "epoch": 18.09,
118474
+ "learning_rate": 1.020518647392603e-05,
118475
+ "loss": 0.4076,
118476
+ "step": 98070
118477
+ },
118478
+ {
118479
+ "epoch": 18.09,
118480
+ "learning_rate": 1.0204144985347559e-05,
118481
+ "loss": 0.4071,
118482
+ "step": 98075
118483
+ },
118484
+ {
118485
+ "epoch": 18.09,
118486
+ "learning_rate": 1.0203106144188201e-05,
118487
+ "loss": 0.4087,
118488
+ "step": 98080
118489
+ },
118490
+ {
118491
+ "epoch": 18.1,
118492
+ "learning_rate": 1.020206995047358e-05,
118493
+ "loss": 0.4125,
118494
+ "step": 98085
118495
+ },
118496
+ {
118497
+ "epoch": 18.1,
118498
+ "learning_rate": 1.020103640422925e-05,
118499
+ "loss": 0.4144,
118500
+ "step": 98090
118501
+ },
118502
+ {
118503
+ "epoch": 18.1,
118504
+ "learning_rate": 1.0200005505480736e-05,
118505
+ "loss": 0.4106,
118506
+ "step": 98095
118507
+ },
118508
+ {
118509
+ "epoch": 18.1,
118510
+ "learning_rate": 1.0198977254253452e-05,
118511
+ "loss": 0.4036,
118512
+ "step": 98100
118513
+ },
118514
+ {
118515
+ "epoch": 18.1,
118516
+ "learning_rate": 1.0197951650572778e-05,
118517
+ "loss": 0.4086,
118518
+ "step": 98105
118519
+ },
118520
+ {
118521
+ "epoch": 18.1,
118522
+ "learning_rate": 1.0196928694464023e-05,
118523
+ "loss": 0.4063,
118524
+ "step": 98110
118525
+ },
118526
+ {
118527
+ "epoch": 18.1,
118528
+ "learning_rate": 1.0195908385952423e-05,
118529
+ "loss": 0.4091,
118530
+ "step": 98115
118531
+ },
118532
+ {
118533
+ "epoch": 18.1,
118534
+ "learning_rate": 1.0194890725063153e-05,
118535
+ "loss": 0.41,
118536
+ "step": 98120
118537
+ },
118538
+ {
118539
+ "epoch": 18.1,
118540
+ "learning_rate": 1.0193875711821333e-05,
118541
+ "loss": 0.409,
118542
+ "step": 98125
118543
+ },
118544
+ {
118545
+ "epoch": 18.1,
118546
+ "learning_rate": 1.0192863346251992e-05,
118547
+ "loss": 0.4071,
118548
+ "step": 98130
118549
+ },
118550
+ {
118551
+ "epoch": 18.11,
118552
+ "learning_rate": 1.019185362838011e-05,
118553
+ "loss": 0.4115,
118554
+ "step": 98135
118555
+ },
118556
+ {
118557
+ "epoch": 18.11,
118558
+ "learning_rate": 1.019084655823062e-05,
118559
+ "loss": 0.4109,
118560
+ "step": 98140
118561
+ },
118562
+ {
118563
+ "epoch": 18.11,
118564
+ "learning_rate": 1.0189842135828343e-05,
118565
+ "loss": 0.4073,
118566
+ "step": 98145
118567
+ },
118568
+ {
118569
+ "epoch": 18.11,
118570
+ "learning_rate": 1.0188840361198094e-05,
118571
+ "loss": 0.4062,
118572
+ "step": 98150
118573
+ },
118574
+ {
118575
+ "epoch": 18.11,
118576
+ "learning_rate": 1.0187841234364555e-05,
118577
+ "loss": 0.4083,
118578
+ "step": 98155
118579
+ },
118580
+ {
118581
+ "epoch": 18.11,
118582
+ "learning_rate": 1.0186844755352406e-05,
118583
+ "loss": 0.4064,
118584
+ "step": 98160
118585
+ },
118586
+ {
118587
+ "epoch": 18.11,
118588
+ "learning_rate": 1.018585092418622e-05,
118589
+ "loss": 0.4069,
118590
+ "step": 98165
118591
+ },
118592
+ {
118593
+ "epoch": 18.11,
118594
+ "learning_rate": 1.0184859740890526e-05,
118595
+ "loss": 0.4071,
118596
+ "step": 98170
118597
+ },
118598
+ {
118599
+ "epoch": 18.11,
118600
+ "learning_rate": 1.018387120548979e-05,
118601
+ "loss": 0.4037,
118602
+ "step": 98175
118603
+ },
118604
+ {
118605
+ "epoch": 18.11,
118606
+ "learning_rate": 1.018288531800838e-05,
118607
+ "loss": 0.4053,
118608
+ "step": 98180
118609
+ },
118610
+ {
118611
+ "epoch": 18.12,
118612
+ "learning_rate": 1.0181902078470635e-05,
118613
+ "loss": 0.4081,
118614
+ "step": 98185
118615
+ },
118616
+ {
118617
+ "epoch": 18.12,
118618
+ "learning_rate": 1.0180921486900806e-05,
118619
+ "loss": 0.4062,
118620
+ "step": 98190
118621
+ },
118622
+ {
118623
+ "epoch": 18.12,
118624
+ "learning_rate": 1.0179943543323107e-05,
118625
+ "loss": 0.409,
118626
+ "step": 98195
118627
+ },
118628
+ {
118629
+ "epoch": 18.12,
118630
+ "learning_rate": 1.0178968247761648e-05,
118631
+ "loss": 0.4101,
118632
+ "step": 98200
118633
+ },
118634
+ {
118635
+ "epoch": 18.12,
118636
+ "learning_rate": 1.01779956002405e-05,
118637
+ "loss": 0.4067,
118638
+ "step": 98205
118639
+ },
118640
+ {
118641
+ "epoch": 18.12,
118642
+ "learning_rate": 1.0177025600783679e-05,
118643
+ "loss": 0.4126,
118644
+ "step": 98210
118645
+ },
118646
+ {
118647
+ "epoch": 18.12,
118648
+ "learning_rate": 1.0176058249415088e-05,
118649
+ "loss": 0.4064,
118650
+ "step": 98215
118651
+ },
118652
+ {
118653
+ "epoch": 18.12,
118654
+ "learning_rate": 1.017509354615861e-05,
118655
+ "loss": 0.408,
118656
+ "step": 98220
118657
+ },
118658
+ {
118659
+ "epoch": 18.12,
118660
+ "learning_rate": 1.0174131491038067e-05,
118661
+ "loss": 0.4072,
118662
+ "step": 98225
118663
+ },
118664
+ {
118665
+ "epoch": 18.12,
118666
+ "learning_rate": 1.017317208407716e-05,
118667
+ "loss": 0.4067,
118668
+ "step": 98230
118669
+ },
118670
+ {
118671
+ "epoch": 18.12,
118672
+ "learning_rate": 1.0172215325299595e-05,
118673
+ "loss": 0.4086,
118674
+ "step": 98235
118675
+ },
118676
+ {
118677
+ "epoch": 18.13,
118678
+ "learning_rate": 1.0171261214728953e-05,
118679
+ "loss": 0.4045,
118680
+ "step": 98240
118681
+ },
118682
+ {
118683
+ "epoch": 18.13,
118684
+ "learning_rate": 1.0170309752388794e-05,
118685
+ "loss": 0.4113,
118686
+ "step": 98245
118687
+ },
118688
+ {
118689
+ "epoch": 18.13,
118690
+ "learning_rate": 1.0169360938302582e-05,
118691
+ "loss": 0.4164,
118692
+ "step": 98250
118693
+ },
118694
+ {
118695
+ "epoch": 18.13,
118696
+ "learning_rate": 1.0168414772493744e-05,
118697
+ "loss": 0.4084,
118698
+ "step": 98255
118699
+ },
118700
+ {
118701
+ "epoch": 18.13,
118702
+ "learning_rate": 1.0167471254985602e-05,
118703
+ "loss": 0.4106,
118704
+ "step": 98260
118705
+ },
118706
+ {
118707
+ "epoch": 18.13,
118708
+ "learning_rate": 1.0166530385801455e-05,
118709
+ "loss": 0.4108,
118710
+ "step": 98265
118711
+ },
118712
+ {
118713
+ "epoch": 18.13,
118714
+ "learning_rate": 1.0165592164964525e-05,
118715
+ "loss": 0.4145,
118716
+ "step": 98270
118717
+ },
118718
+ {
118719
+ "epoch": 18.13,
118720
+ "learning_rate": 1.0164656592497931e-05,
118721
+ "loss": 0.4124,
118722
+ "step": 98275
118723
+ },
118724
+ {
118725
+ "epoch": 18.13,
118726
+ "learning_rate": 1.016372366842479e-05,
118727
+ "loss": 0.4069,
118728
+ "step": 98280
118729
+ },
118730
+ {
118731
+ "epoch": 18.13,
118732
+ "learning_rate": 1.0162793392768091e-05,
118733
+ "loss": 0.4078,
118734
+ "step": 98285
118735
+ },
118736
+ {
118737
+ "epoch": 18.14,
118738
+ "learning_rate": 1.0161865765550818e-05,
118739
+ "loss": 0.4144,
118740
+ "step": 98290
118741
+ },
118742
+ {
118743
+ "epoch": 18.14,
118744
+ "learning_rate": 1.016094078679584e-05,
118745
+ "loss": 0.4066,
118746
+ "step": 98295
118747
+ },
118748
+ {
118749
+ "epoch": 18.14,
118750
+ "learning_rate": 1.0160018456525988e-05,
118751
+ "loss": 0.4154,
118752
+ "step": 98300
118753
+ },
118754
+ {
118755
+ "epoch": 18.14,
118756
+ "learning_rate": 1.015909877476401e-05,
118757
+ "loss": 0.4052,
118758
+ "step": 98305
118759
+ },
118760
+ {
118761
+ "epoch": 18.14,
118762
+ "learning_rate": 1.0158181741532607e-05,
118763
+ "loss": 0.4074,
118764
+ "step": 98310
118765
+ },
118766
+ {
118767
+ "epoch": 18.14,
118768
+ "learning_rate": 1.0157267356854405e-05,
118769
+ "loss": 0.4119,
118770
+ "step": 98315
118771
+ },
118772
+ {
118773
+ "epoch": 18.14,
118774
+ "learning_rate": 1.0156355620751963e-05,
118775
+ "loss": 0.4069,
118776
+ "step": 98320
118777
+ },
118778
+ {
118779
+ "epoch": 18.14,
118780
+ "learning_rate": 1.0155446533247778e-05,
118781
+ "loss": 0.4046,
118782
+ "step": 98325
118783
+ },
118784
+ {
118785
+ "epoch": 18.14,
118786
+ "learning_rate": 1.0154540094364277e-05,
118787
+ "loss": 0.4049,
118788
+ "step": 98330
118789
+ },
118790
+ {
118791
+ "epoch": 18.14,
118792
+ "learning_rate": 1.0153636304123833e-05,
118793
+ "loss": 0.4092,
118794
+ "step": 98335
118795
+ },
118796
+ {
118797
+ "epoch": 18.15,
118798
+ "learning_rate": 1.0152735162548734e-05,
118799
+ "loss": 0.405,
118800
+ "step": 98340
118801
+ },
118802
+ {
118803
+ "epoch": 18.15,
118804
+ "learning_rate": 1.0151836669661236e-05,
118805
+ "loss": 0.4064,
118806
+ "step": 98345
118807
+ },
118808
+ {
118809
+ "epoch": 18.15,
118810
+ "learning_rate": 1.0150940825483491e-05,
118811
+ "loss": 0.4118,
118812
+ "step": 98350
118813
+ },
118814
+ {
118815
+ "epoch": 18.15,
118816
+ "learning_rate": 1.0150047630037602e-05,
118817
+ "loss": 0.4049,
118818
+ "step": 98355
118819
+ },
118820
+ {
118821
+ "epoch": 18.15,
118822
+ "learning_rate": 1.0149157083345625e-05,
118823
+ "loss": 0.4039,
118824
+ "step": 98360
118825
+ },
118826
+ {
118827
+ "epoch": 18.15,
118828
+ "learning_rate": 1.0148269185429508e-05,
118829
+ "loss": 0.4085,
118830
+ "step": 98365
118831
+ },
118832
+ {
118833
+ "epoch": 18.15,
118834
+ "learning_rate": 1.0147383936311186e-05,
118835
+ "loss": 0.4053,
118836
+ "step": 98370
118837
+ },
118838
+ {
118839
+ "epoch": 18.15,
118840
+ "learning_rate": 1.014650133601248e-05,
118841
+ "loss": 0.4029,
118842
+ "step": 98375
118843
+ },
118844
+ {
118845
+ "epoch": 18.15,
118846
+ "learning_rate": 1.0145621384555178e-05,
118847
+ "loss": 0.4047,
118848
+ "step": 98380
118849
+ },
118850
+ {
118851
+ "epoch": 18.15,
118852
+ "learning_rate": 1.0144744081960993e-05,
118853
+ "loss": 0.41,
118854
+ "step": 98385
118855
+ },
118856
+ {
118857
+ "epoch": 18.16,
118858
+ "learning_rate": 1.0143869428251562e-05,
118859
+ "loss": 0.4048,
118860
+ "step": 98390
118861
+ },
118862
+ {
118863
+ "epoch": 18.16,
118864
+ "learning_rate": 1.0142997423448477e-05,
118865
+ "loss": 0.4069,
118866
+ "step": 98395
118867
+ },
118868
+ {
118869
+ "epoch": 18.16,
118870
+ "learning_rate": 1.0142128067573252e-05,
118871
+ "loss": 0.4037,
118872
+ "step": 98400
118873
+ },
118874
+ {
118875
+ "epoch": 18.16,
118876
+ "learning_rate": 1.0141261360647338e-05,
118877
+ "loss": 0.4071,
118878
+ "step": 98405
118879
+ },
118880
+ {
118881
+ "epoch": 18.16,
118882
+ "learning_rate": 1.014039730269211e-05,
118883
+ "loss": 0.4031,
118884
+ "step": 98410
118885
+ },
118886
+ {
118887
+ "epoch": 18.16,
118888
+ "learning_rate": 1.0139535893728894e-05,
118889
+ "loss": 0.4099,
118890
+ "step": 98415
118891
+ },
118892
+ {
118893
+ "epoch": 18.16,
118894
+ "learning_rate": 1.0138677133778948e-05,
118895
+ "loss": 0.4134,
118896
+ "step": 98420
118897
+ },
118898
+ {
118899
+ "epoch": 18.16,
118900
+ "learning_rate": 1.0137821022863454e-05,
118901
+ "loss": 0.4097,
118902
+ "step": 98425
118903
+ },
118904
+ {
118905
+ "epoch": 18.16,
118906
+ "learning_rate": 1.0136967561003539e-05,
118907
+ "loss": 0.4054,
118908
+ "step": 98430
118909
+ },
118910
+ {
118911
+ "epoch": 18.16,
118912
+ "learning_rate": 1.0136116748220269e-05,
118913
+ "loss": 0.4041,
118914
+ "step": 98435
118915
+ },
118916
+ {
118917
+ "epoch": 18.16,
118918
+ "learning_rate": 1.0135268584534622e-05,
118919
+ "loss": 0.4148,
118920
+ "step": 98440
118921
+ },
118922
+ {
118923
+ "epoch": 18.17,
118924
+ "learning_rate": 1.0134423069967546e-05,
118925
+ "loss": 0.4012,
118926
+ "step": 98445
118927
+ },
118928
+ {
118929
+ "epoch": 18.17,
118930
+ "learning_rate": 1.0133580204539879e-05,
118931
+ "loss": 0.4096,
118932
+ "step": 98450
118933
+ },
118934
+ {
118935
+ "epoch": 18.17,
118936
+ "learning_rate": 1.0132739988272434e-05,
118937
+ "loss": 0.4123,
118938
+ "step": 98455
118939
+ },
118940
+ {
118941
+ "epoch": 18.17,
118942
+ "learning_rate": 1.0131902421185933e-05,
118943
+ "loss": 0.4108,
118944
+ "step": 98460
118945
+ },
118946
+ {
118947
+ "epoch": 18.17,
118948
+ "learning_rate": 1.0131067503301058e-05,
118949
+ "loss": 0.4102,
118950
+ "step": 98465
118951
+ },
118952
+ {
118953
+ "epoch": 18.17,
118954
+ "learning_rate": 1.0130235234638395e-05,
118955
+ "loss": 0.4018,
118956
+ "step": 98470
118957
+ },
118958
+ {
118959
+ "epoch": 18.17,
118960
+ "learning_rate": 1.0129405615218487e-05,
118961
+ "loss": 0.4055,
118962
+ "step": 98475
118963
+ },
118964
+ {
118965
+ "epoch": 18.17,
118966
+ "learning_rate": 1.0128578645061798e-05,
118967
+ "loss": 0.4092,
118968
+ "step": 98480
118969
+ },
118970
+ {
118971
+ "epoch": 18.17,
118972
+ "learning_rate": 1.0127754324188729e-05,
118973
+ "loss": 0.4109,
118974
+ "step": 98485
118975
+ },
118976
+ {
118977
+ "epoch": 18.17,
118978
+ "learning_rate": 1.0126932652619638e-05,
118979
+ "loss": 0.4084,
118980
+ "step": 98490
118981
+ },
118982
+ {
118983
+ "epoch": 18.18,
118984
+ "learning_rate": 1.0126113630374782e-05,
118985
+ "loss": 0.4056,
118986
+ "step": 98495
118987
+ },
118988
+ {
118989
+ "epoch": 18.18,
118990
+ "learning_rate": 1.0125297257474363e-05,
118991
+ "loss": 0.4108,
118992
+ "step": 98500
118993
+ },
118994
+ {
118995
+ "epoch": 18.18,
118996
+ "learning_rate": 1.0124483533938553e-05,
118997
+ "loss": 0.4039,
118998
+ "step": 98505
118999
+ },
119000
+ {
119001
+ "epoch": 18.18,
119002
+ "learning_rate": 1.0123672459787401e-05,
119003
+ "loss": 0.4064,
119004
+ "step": 98510
119005
+ },
119006
+ {
119007
+ "epoch": 18.18,
119008
+ "learning_rate": 1.0122864035040937e-05,
119009
+ "loss": 0.4131,
119010
+ "step": 98515
119011
+ },
119012
+ {
119013
+ "epoch": 18.18,
119014
+ "learning_rate": 1.01220582597191e-05,
119015
+ "loss": 0.4057,
119016
+ "step": 98520
119017
+ },
119018
+ {
119019
+ "epoch": 18.18,
119020
+ "learning_rate": 1.0121255133841766e-05,
119021
+ "loss": 0.4069,
119022
+ "step": 98525
119023
+ },
119024
+ {
119025
+ "epoch": 18.18,
119026
+ "learning_rate": 1.0120454657428767e-05,
119027
+ "loss": 0.4016,
119028
+ "step": 98530
119029
+ },
119030
+ {
119031
+ "epoch": 18.18,
119032
+ "learning_rate": 1.0119656830499845e-05,
119033
+ "loss": 0.4096,
119034
+ "step": 98535
119035
+ },
119036
+ {
119037
+ "epoch": 18.18,
119038
+ "learning_rate": 1.011886165307468e-05,
119039
+ "loss": 0.4103,
119040
+ "step": 98540
119041
+ },
119042
+ {
119043
+ "epoch": 18.19,
119044
+ "learning_rate": 1.0118069125172907e-05,
119045
+ "loss": 0.4084,
119046
+ "step": 98545
119047
+ },
119048
+ {
119049
+ "epoch": 18.19,
119050
+ "learning_rate": 1.0117279246814072e-05,
119051
+ "loss": 0.4096,
119052
+ "step": 98550
119053
+ },
119054
+ {
119055
+ "epoch": 18.19,
119056
+ "learning_rate": 1.0116492018017667e-05,
119057
+ "loss": 0.404,
119058
+ "step": 98555
119059
+ },
119060
+ {
119061
+ "epoch": 18.19,
119062
+ "learning_rate": 1.0115707438803099e-05,
119063
+ "loss": 0.4102,
119064
+ "step": 98560
119065
+ },
119066
+ {
119067
+ "epoch": 18.19,
119068
+ "learning_rate": 1.0114925509189759e-05,
119069
+ "loss": 0.4064,
119070
+ "step": 98565
119071
+ },
119072
+ {
119073
+ "epoch": 18.19,
119074
+ "learning_rate": 1.0114146229196912e-05,
119075
+ "loss": 0.4036,
119076
+ "step": 98570
119077
+ },
119078
+ {
119079
+ "epoch": 18.19,
119080
+ "learning_rate": 1.0113369598843809e-05,
119081
+ "loss": 0.4089,
119082
+ "step": 98575
119083
+ },
119084
+ {
119085
+ "epoch": 18.19,
119086
+ "learning_rate": 1.0112595618149581e-05,
119087
+ "loss": 0.4079,
119088
+ "step": 98580
119089
+ },
119090
+ {
119091
+ "epoch": 18.19,
119092
+ "learning_rate": 1.0111824287133362e-05,
119093
+ "loss": 0.4104,
119094
+ "step": 98585
119095
+ },
119096
+ {
119097
+ "epoch": 18.19,
119098
+ "learning_rate": 1.011105560581416e-05,
119099
+ "loss": 0.4072,
119100
+ "step": 98590
119101
+ },
119102
+ {
119103
+ "epoch": 18.19,
119104
+ "learning_rate": 1.0110289574210955e-05,
119105
+ "loss": 0.4052,
119106
+ "step": 98595
119107
+ },
119108
+ {
119109
+ "epoch": 18.2,
119110
+ "learning_rate": 1.0109526192342637e-05,
119111
+ "loss": 0.4058,
119112
+ "step": 98600
119113
+ },
119114
+ {
119115
+ "epoch": 18.2,
119116
+ "learning_rate": 1.0108765460228043e-05,
119117
+ "loss": 0.4105,
119118
+ "step": 98605
119119
+ },
119120
+ {
119121
+ "epoch": 18.2,
119122
+ "learning_rate": 1.0108007377885944e-05,
119123
+ "loss": 0.4057,
119124
+ "step": 98610
119125
+ },
119126
+ {
119127
+ "epoch": 18.2,
119128
+ "learning_rate": 1.0107251945335054e-05,
119129
+ "loss": 0.4056,
119130
+ "step": 98615
119131
+ },
119132
+ {
119133
+ "epoch": 18.2,
119134
+ "learning_rate": 1.0106499162593993e-05,
119135
+ "loss": 0.4074,
119136
+ "step": 98620
119137
+ },
119138
+ {
119139
+ "epoch": 18.2,
119140
+ "learning_rate": 1.0105749029681366e-05,
119141
+ "loss": 0.4061,
119142
+ "step": 98625
119143
+ },
119144
+ {
119145
+ "epoch": 18.2,
119146
+ "learning_rate": 1.0105001546615648e-05,
119147
+ "loss": 0.4065,
119148
+ "step": 98630
119149
+ },
119150
+ {
119151
+ "epoch": 18.2,
119152
+ "learning_rate": 1.0104256713415303e-05,
119153
+ "loss": 0.4138,
119154
+ "step": 98635
119155
+ },
119156
+ {
119157
+ "epoch": 18.2,
119158
+ "learning_rate": 1.0103514530098711e-05,
119159
+ "loss": 0.4169,
119160
+ "step": 98640
119161
+ },
119162
+ {
119163
+ "epoch": 18.2,
119164
+ "learning_rate": 1.0102774996684168e-05,
119165
+ "loss": 0.4108,
119166
+ "step": 98645
119167
+ },
119168
+ {
119169
+ "epoch": 18.21,
119170
+ "learning_rate": 1.0102038113189948e-05,
119171
+ "loss": 0.4073,
119172
+ "step": 98650
119173
+ },
119174
+ {
119175
+ "epoch": 18.21,
119176
+ "learning_rate": 1.01013038796342e-05,
119177
+ "loss": 0.4045,
119178
+ "step": 98655
119179
+ },
119180
+ {
119181
+ "epoch": 18.21,
119182
+ "learning_rate": 1.0100572296035055e-05,
119183
+ "loss": 0.4051,
119184
+ "step": 98660
119185
+ },
119186
+ {
119187
+ "epoch": 18.21,
119188
+ "learning_rate": 1.0099843362410572e-05,
119189
+ "loss": 0.4035,
119190
+ "step": 98665
119191
+ },
119192
+ {
119193
+ "epoch": 18.21,
119194
+ "learning_rate": 1.0099117078778724e-05,
119195
+ "loss": 0.4086,
119196
+ "step": 98670
119197
+ },
119198
+ {
119199
+ "epoch": 18.21,
119200
+ "learning_rate": 1.0098393445157448e-05,
119201
+ "loss": 0.4056,
119202
+ "step": 98675
119203
+ },
119204
+ {
119205
+ "epoch": 18.21,
119206
+ "learning_rate": 1.0097672461564575e-05,
119207
+ "loss": 0.4068,
119208
+ "step": 98680
119209
+ },
119210
+ {
119211
+ "epoch": 18.21,
119212
+ "learning_rate": 1.0096954128017914e-05,
119213
+ "loss": 0.4016,
119214
+ "step": 98685
119215
+ },
119216
+ {
119217
+ "epoch": 18.21,
119218
+ "learning_rate": 1.0096238444535187e-05,
119219
+ "loss": 0.4096,
119220
+ "step": 98690
119221
+ },
119222
+ {
119223
+ "epoch": 18.21,
119224
+ "learning_rate": 1.0095525411134045e-05,
119225
+ "loss": 0.4083,
119226
+ "step": 98695
119227
+ },
119228
+ {
119229
+ "epoch": 18.22,
119230
+ "learning_rate": 1.0094815027832091e-05,
119231
+ "loss": 0.4043,
119232
+ "step": 98700
119233
+ },
119234
+ {
119235
+ "epoch": 18.22,
119236
+ "learning_rate": 1.0094107294646838e-05,
119237
+ "loss": 0.4121,
119238
+ "step": 98705
119239
+ },
119240
+ {
119241
+ "epoch": 18.22,
119242
+ "learning_rate": 1.0093402211595765e-05,
119243
+ "loss": 0.4081,
119244
+ "step": 98710
119245
+ },
119246
+ {
119247
+ "epoch": 18.22,
119248
+ "learning_rate": 1.0092699778696253e-05,
119249
+ "loss": 0.401,
119250
+ "step": 98715
119251
+ },
119252
+ {
119253
+ "epoch": 18.22,
119254
+ "learning_rate": 1.0091999995965654e-05,
119255
+ "loss": 0.4035,
119256
+ "step": 98720
119257
+ },
119258
+ {
119259
+ "epoch": 18.22,
119260
+ "learning_rate": 1.0091302863421215e-05,
119261
+ "loss": 0.4106,
119262
+ "step": 98725
119263
+ },
119264
+ {
119265
+ "epoch": 18.22,
119266
+ "learning_rate": 1.0090608381080157e-05,
119267
+ "loss": 0.4083,
119268
+ "step": 98730
119269
+ },
119270
+ {
119271
+ "epoch": 18.22,
119272
+ "learning_rate": 1.0089916548959594e-05,
119273
+ "loss": 0.408,
119274
+ "step": 98735
119275
+ },
119276
+ {
119277
+ "epoch": 18.22,
119278
+ "learning_rate": 1.0089227367076605e-05,
119279
+ "loss": 0.4098,
119280
+ "step": 98740
119281
+ },
119282
+ {
119283
+ "epoch": 18.22,
119284
+ "learning_rate": 1.0088540835448211e-05,
119285
+ "loss": 0.4056,
119286
+ "step": 98745
119287
+ },
119288
+ {
119289
+ "epoch": 18.23,
119290
+ "learning_rate": 1.0087856954091324e-05,
119291
+ "loss": 0.4047,
119292
+ "step": 98750
119293
+ },
119294
+ {
119295
+ "epoch": 18.23,
119296
+ "learning_rate": 1.0087175723022828e-05,
119297
+ "loss": 0.4058,
119298
+ "step": 98755
119299
+ },
119300
+ {
119301
+ "epoch": 18.23,
119302
+ "learning_rate": 1.0086497142259544e-05,
119303
+ "loss": 0.4101,
119304
+ "step": 98760
119305
+ },
119306
+ {
119307
+ "epoch": 18.23,
119308
+ "learning_rate": 1.00858212118182e-05,
119309
+ "loss": 0.4065,
119310
+ "step": 98765
119311
+ },
119312
+ {
119313
+ "epoch": 18.23,
119314
+ "learning_rate": 1.0085147931715484e-05,
119315
+ "loss": 0.4072,
119316
+ "step": 98770
119317
+ },
119318
+ {
119319
+ "epoch": 18.23,
119320
+ "learning_rate": 1.0084477301967998e-05,
119321
+ "loss": 0.4091,
119322
+ "step": 98775
119323
+ },
119324
+ {
119325
+ "epoch": 18.23,
119326
+ "learning_rate": 1.0083809322592294e-05,
119327
+ "loss": 0.4107,
119328
+ "step": 98780
119329
+ },
119330
+ {
119331
+ "epoch": 18.23,
119332
+ "learning_rate": 1.0083143993604866e-05,
119333
+ "loss": 0.4122,
119334
+ "step": 98785
119335
+ },
119336
+ {
119337
+ "epoch": 18.23,
119338
+ "learning_rate": 1.0082481315022114e-05,
119339
+ "loss": 0.4057,
119340
+ "step": 98790
119341
+ },
119342
+ {
119343
+ "epoch": 18.23,
119344
+ "learning_rate": 1.0081821286860389e-05,
119345
+ "loss": 0.4054,
119346
+ "step": 98795
119347
+ },
119348
+ {
119349
+ "epoch": 18.23,
119350
+ "learning_rate": 1.0081163909135992e-05,
119351
+ "loss": 0.4039,
119352
+ "step": 98800
119353
+ },
119354
+ {
119355
+ "epoch": 18.24,
119356
+ "learning_rate": 1.0080509181865132e-05,
119357
+ "loss": 0.4037,
119358
+ "step": 98805
119359
+ },
119360
+ {
119361
+ "epoch": 18.24,
119362
+ "learning_rate": 1.0079857105063955e-05,
119363
+ "loss": 0.4078,
119364
+ "step": 98810
119365
+ },
119366
+ {
119367
+ "epoch": 18.24,
119368
+ "learning_rate": 1.0079207678748576e-05,
119369
+ "loss": 0.4079,
119370
+ "step": 98815
119371
+ },
119372
+ {
119373
+ "epoch": 18.24,
119374
+ "learning_rate": 1.0078560902934987e-05,
119375
+ "loss": 0.4112,
119376
+ "step": 98820
119377
+ },
119378
+ {
119379
+ "epoch": 18.24,
119380
+ "learning_rate": 1.0077916777639168e-05,
119381
+ "loss": 0.4096,
119382
+ "step": 98825
119383
+ },
119384
+ {
119385
+ "epoch": 18.24,
119386
+ "learning_rate": 1.0077275302877016e-05,
119387
+ "loss": 0.4044,
119388
+ "step": 98830
119389
+ },
119390
+ {
119391
+ "epoch": 18.24,
119392
+ "learning_rate": 1.0076636478664332e-05,
119393
+ "loss": 0.4104,
119394
+ "step": 98835
119395
+ },
119396
+ {
119397
+ "epoch": 18.24,
119398
+ "learning_rate": 1.007600030501691e-05,
119399
+ "loss": 0.4124,
119400
+ "step": 98840
119401
+ },
119402
+ {
119403
+ "epoch": 18.24,
119404
+ "learning_rate": 1.0075366781950428e-05,
119405
+ "loss": 0.4088,
119406
+ "step": 98845
119407
+ },
119408
+ {
119409
+ "epoch": 18.24,
119410
+ "learning_rate": 1.0074735909480522e-05,
119411
+ "loss": 0.4078,
119412
+ "step": 98850
119413
+ },
119414
+ {
119415
+ "epoch": 18.25,
119416
+ "learning_rate": 1.0074107687622768e-05,
119417
+ "loss": 0.4062,
119418
+ "step": 98855
119419
+ },
119420
+ {
119421
+ "epoch": 18.25,
119422
+ "learning_rate": 1.0073482116392646e-05,
119423
+ "loss": 0.4033,
119424
+ "step": 98860
119425
+ },
119426
+ {
119427
+ "epoch": 18.25,
119428
+ "learning_rate": 1.0072859195805609e-05,
119429
+ "loss": 0.4058,
119430
+ "step": 98865
119431
+ },
119432
+ {
119433
+ "epoch": 18.25,
119434
+ "learning_rate": 1.007223892587702e-05,
119435
+ "loss": 0.4087,
119436
+ "step": 98870
119437
+ },
119438
+ {
119439
+ "epoch": 18.25,
119440
+ "learning_rate": 1.0071621306622179e-05,
119441
+ "loss": 0.4071,
119442
+ "step": 98875
119443
+ },
119444
+ {
119445
+ "epoch": 18.25,
119446
+ "learning_rate": 1.0071006338056337e-05,
119447
+ "loss": 0.4073,
119448
+ "step": 98880
119449
+ },
119450
+ {
119451
+ "epoch": 18.25,
119452
+ "learning_rate": 1.0070394020194652e-05,
119453
+ "loss": 0.4113,
119454
+ "step": 98885
119455
+ },
119456
+ {
119457
+ "epoch": 18.25,
119458
+ "learning_rate": 1.0069784353052248e-05,
119459
+ "loss": 0.4056,
119460
+ "step": 98890
119461
+ },
119462
+ {
119463
+ "epoch": 18.25,
119464
+ "learning_rate": 1.0069177336644172e-05,
119465
+ "loss": 0.4058,
119466
+ "step": 98895
119467
+ },
119468
+ {
119469
+ "epoch": 18.25,
119470
+ "learning_rate": 1.006857297098538e-05,
119471
+ "loss": 0.4078,
119472
+ "step": 98900
119473
+ },
119474
+ {
119475
+ "epoch": 18.26,
119476
+ "learning_rate": 1.0067971256090789e-05,
119477
+ "loss": 0.4122,
119478
+ "step": 98905
119479
+ },
119480
+ {
119481
+ "epoch": 18.26,
119482
+ "learning_rate": 1.0067372191975258e-05,
119483
+ "loss": 0.4115,
119484
+ "step": 98910
119485
+ },
119486
+ {
119487
+ "epoch": 18.26,
119488
+ "learning_rate": 1.0066775778653566e-05,
119489
+ "loss": 0.4116,
119490
+ "step": 98915
119491
+ },
119492
+ {
119493
+ "epoch": 18.26,
119494
+ "learning_rate": 1.0066182016140424e-05,
119495
+ "loss": 0.4109,
119496
+ "step": 98920
119497
+ },
119498
+ {
119499
+ "epoch": 18.26,
119500
+ "learning_rate": 1.006559090445048e-05,
119501
+ "loss": 0.4067,
119502
+ "step": 98925
119503
+ },
119504
+ {
119505
+ "epoch": 18.26,
119506
+ "learning_rate": 1.0065002443598315e-05,
119507
+ "loss": 0.4049,
119508
+ "step": 98930
119509
+ },
119510
+ {
119511
+ "epoch": 18.26,
119512
+ "learning_rate": 1.006441663359847e-05,
119513
+ "loss": 0.4073,
119514
+ "step": 98935
119515
+ },
119516
+ {
119517
+ "epoch": 18.26,
119518
+ "learning_rate": 1.006383347446538e-05,
119519
+ "loss": 0.4077,
119520
+ "step": 98940
119521
+ },
119522
+ {
119523
+ "epoch": 18.26,
119524
+ "learning_rate": 1.0063252966213445e-05,
119525
+ "loss": 0.4094,
119526
+ "step": 98945
119527
+ },
119528
+ {
119529
+ "epoch": 18.26,
119530
+ "learning_rate": 1.006267510885697e-05,
119531
+ "loss": 0.4119,
119532
+ "step": 98950
119533
+ },
119534
+ {
119535
+ "epoch": 18.26,
119536
+ "learning_rate": 1.0062099902410232e-05,
119537
+ "loss": 0.4034,
119538
+ "step": 98955
119539
+ },
119540
+ {
119541
+ "epoch": 18.27,
119542
+ "learning_rate": 1.0061527346887421e-05,
119543
+ "loss": 0.4068,
119544
+ "step": 98960
119545
+ },
119546
+ {
119547
+ "epoch": 18.27,
119548
+ "learning_rate": 1.0060957442302668e-05,
119549
+ "loss": 0.4095,
119550
+ "step": 98965
119551
+ },
119552
+ {
119553
+ "epoch": 18.27,
119554
+ "learning_rate": 1.006039018867002e-05,
119555
+ "loss": 0.4047,
119556
+ "step": 98970
119557
+ },
119558
+ {
119559
+ "epoch": 18.27,
119560
+ "learning_rate": 1.0059825586003476e-05,
119561
+ "loss": 0.4051,
119562
+ "step": 98975
119563
+ },
119564
+ {
119565
+ "epoch": 18.27,
119566
+ "learning_rate": 1.0059263634316976e-05,
119567
+ "loss": 0.4065,
119568
+ "step": 98980
119569
+ },
119570
+ {
119571
+ "epoch": 18.27,
119572
+ "learning_rate": 1.0058704333624379e-05,
119573
+ "loss": 0.4063,
119574
+ "step": 98985
119575
+ },
119576
+ {
119577
+ "epoch": 18.27,
119578
+ "learning_rate": 1.005814768393949e-05,
119579
+ "loss": 0.4114,
119580
+ "step": 98990
119581
+ },
119582
+ {
119583
+ "epoch": 18.27,
119584
+ "learning_rate": 1.0057593685276049e-05,
119585
+ "loss": 0.4101,
119586
+ "step": 98995
119587
+ },
119588
+ {
119589
+ "epoch": 18.27,
119590
+ "learning_rate": 1.0057042337647708e-05,
119591
+ "loss": 0.4138,
119592
+ "step": 99000
119593
+ },
119594
+ {
119595
+ "epoch": 18.27,
119596
+ "eval_loss": 0.38938432931900024,
119597
+ "eval_runtime": 98.526,
119598
+ "eval_samples_per_second": 50.748,
119599
+ "eval_steps_per_second": 0.538,
119600
+ "step": 99000
119601
+ },
119602
+ {
119603
+ "epoch": 18.27,
119604
+ "learning_rate": 1.0056493641068084e-05,
119605
+ "loss": 0.4099,
119606
+ "step": 99005
119607
+ },
119608
+ {
119609
+ "epoch": 18.28,
119610
+ "learning_rate": 1.0055947595550713e-05,
119611
+ "loss": 0.4116,
119612
+ "step": 99010
119613
+ },
119614
+ {
119615
+ "epoch": 18.28,
119616
+ "learning_rate": 1.0055404201109082e-05,
119617
+ "loss": 0.4075,
119618
+ "step": 99015
119619
+ },
119620
+ {
119621
+ "epoch": 18.28,
119622
+ "learning_rate": 1.0054863457756567e-05,
119623
+ "loss": 0.4075,
119624
+ "step": 99020
119625
+ },
119626
+ {
119627
+ "epoch": 18.28,
119628
+ "learning_rate": 1.0054325365506526e-05,
119629
+ "loss": 0.4103,
119630
+ "step": 99025
119631
+ },
119632
+ {
119633
+ "epoch": 18.28,
119634
+ "learning_rate": 1.0053789924372251e-05,
119635
+ "loss": 0.4088,
119636
+ "step": 99030
119637
+ },
119638
+ {
119639
+ "epoch": 18.28,
119640
+ "learning_rate": 1.0053257134366935e-05,
119641
+ "loss": 0.4081,
119642
+ "step": 99035
119643
+ },
119644
+ {
119645
+ "epoch": 18.28,
119646
+ "learning_rate": 1.0052726995503727e-05,
119647
+ "loss": 0.4065,
119648
+ "step": 99040
119649
+ },
119650
+ {
119651
+ "epoch": 18.28,
119652
+ "learning_rate": 1.0052199507795707e-05,
119653
+ "loss": 0.4121,
119654
+ "step": 99045
119655
+ },
119656
+ {
119657
+ "epoch": 18.28,
119658
+ "learning_rate": 1.0051674671255899e-05,
119659
+ "loss": 0.409,
119660
+ "step": 99050
119661
+ },
119662
+ {
119663
+ "epoch": 18.28,
119664
+ "learning_rate": 1.0051152485897237e-05,
119665
+ "loss": 0.407,
119666
+ "step": 99055
119667
+ },
119668
+ {
119669
+ "epoch": 18.29,
119670
+ "learning_rate": 1.0050632951732626e-05,
119671
+ "loss": 0.4081,
119672
+ "step": 99060
119673
+ },
119674
+ {
119675
+ "epoch": 18.29,
119676
+ "learning_rate": 1.0050116068774868e-05,
119677
+ "loss": 0.4073,
119678
+ "step": 99065
119679
+ },
119680
+ {
119681
+ "epoch": 18.29,
119682
+ "learning_rate": 1.0049601837036726e-05,
119683
+ "loss": 0.4074,
119684
+ "step": 99070
119685
+ },
119686
+ {
119687
+ "epoch": 18.29,
119688
+ "learning_rate": 1.0049090256530885e-05,
119689
+ "loss": 0.4143,
119690
+ "step": 99075
119691
+ },
119692
+ {
119693
+ "epoch": 18.29,
119694
+ "learning_rate": 1.004858132726997e-05,
119695
+ "loss": 0.409,
119696
+ "step": 99080
119697
+ },
119698
+ {
119699
+ "epoch": 18.29,
119700
+ "learning_rate": 1.0048075049266527e-05,
119701
+ "loss": 0.4047,
119702
+ "step": 99085
119703
+ },
119704
+ {
119705
+ "epoch": 18.29,
119706
+ "learning_rate": 1.0047571422533064e-05,
119707
+ "loss": 0.4048,
119708
+ "step": 99090
119709
+ },
119710
+ {
119711
+ "epoch": 18.29,
119712
+ "learning_rate": 1.0047070447082003e-05,
119713
+ "loss": 0.4061,
119714
+ "step": 99095
119715
+ },
119716
+ {
119717
+ "epoch": 18.29,
119718
+ "learning_rate": 1.0046572122925699e-05,
119719
+ "loss": 0.4091,
119720
+ "step": 99100
119721
+ },
119722
+ {
119723
+ "epoch": 18.29,
119724
+ "learning_rate": 1.0046076450076455e-05,
119725
+ "loss": 0.4034,
119726
+ "step": 99105
119727
+ },
119728
+ {
119729
+ "epoch": 18.3,
119730
+ "learning_rate": 1.0045583428546493e-05,
119731
+ "loss": 0.4098,
119732
+ "step": 99110
119733
+ },
119734
+ {
119735
+ "epoch": 18.3,
119736
+ "learning_rate": 1.0045093058347987e-05,
119737
+ "loss": 0.4053,
119738
+ "step": 99115
119739
+ },
119740
+ {
119741
+ "epoch": 18.3,
119742
+ "learning_rate": 1.0044605339493029e-05,
119743
+ "loss": 0.4017,
119744
+ "step": 99120
119745
+ },
119746
+ {
119747
+ "epoch": 18.3,
119748
+ "learning_rate": 1.0044120271993656e-05,
119749
+ "loss": 0.4101,
119750
+ "step": 99125
119751
+ },
119752
+ {
119753
+ "epoch": 18.3,
119754
+ "learning_rate": 1.0043637855861832e-05,
119755
+ "loss": 0.4097,
119756
+ "step": 99130
119757
+ },
119758
+ {
119759
+ "epoch": 18.3,
119760
+ "learning_rate": 1.0043158091109465e-05,
119761
+ "loss": 0.4057,
119762
+ "step": 99135
119763
+ },
119764
+ {
119765
+ "epoch": 18.3,
119766
+ "learning_rate": 1.0042680977748395e-05,
119767
+ "loss": 0.4076,
119768
+ "step": 99140
119769
+ },
119770
+ {
119771
+ "epoch": 18.3,
119772
+ "learning_rate": 1.0042206515790391e-05,
119773
+ "loss": 0.4091,
119774
+ "step": 99145
119775
+ },
119776
+ {
119777
+ "epoch": 18.3,
119778
+ "learning_rate": 1.0041734705247159e-05,
119779
+ "loss": 0.4097,
119780
+ "step": 99150
119781
+ },
119782
+ {
119783
+ "epoch": 18.3,
119784
+ "learning_rate": 1.004126554613035e-05,
119785
+ "loss": 0.4043,
119786
+ "step": 99155
119787
+ },
119788
+ {
119789
+ "epoch": 18.3,
119790
+ "learning_rate": 1.0040799038451528e-05,
119791
+ "loss": 0.4049,
119792
+ "step": 99160
119793
+ },
119794
+ {
119795
+ "epoch": 18.31,
119796
+ "learning_rate": 1.0040335182222196e-05,
119797
+ "loss": 0.4086,
119798
+ "step": 99165
119799
+ },
119800
+ {
119801
+ "epoch": 18.31,
119802
+ "learning_rate": 1.0039873977453827e-05,
119803
+ "loss": 0.4102,
119804
+ "step": 99170
119805
+ },
119806
+ {
119807
+ "epoch": 18.31,
119808
+ "learning_rate": 1.003941542415777e-05,
119809
+ "loss": 0.4139,
119810
+ "step": 99175
119811
+ },
119812
+ {
119813
+ "epoch": 18.31,
119814
+ "learning_rate": 1.003895952234536e-05,
119815
+ "loss": 0.4056,
119816
+ "step": 99180
119817
+ },
119818
+ {
119819
+ "epoch": 18.31,
119820
+ "learning_rate": 1.0038506272027845e-05,
119821
+ "loss": 0.4038,
119822
+ "step": 99185
119823
+ },
119824
+ {
119825
+ "epoch": 18.31,
119826
+ "learning_rate": 1.0038055673216404e-05,
119827
+ "loss": 0.4081,
119828
+ "step": 99190
119829
+ },
119830
+ {
119831
+ "epoch": 18.31,
119832
+ "learning_rate": 1.0037607725922138e-05,
119833
+ "loss": 0.4074,
119834
+ "step": 99195
119835
+ },
119836
+ {
119837
+ "epoch": 18.31,
119838
+ "learning_rate": 1.0037162430156133e-05,
119839
+ "loss": 0.4054,
119840
+ "step": 99200
119841
+ },
119842
+ {
119843
+ "epoch": 18.31,
119844
+ "learning_rate": 1.0036719785929348e-05,
119845
+ "loss": 0.406,
119846
+ "step": 99205
119847
+ },
119848
+ {
119849
+ "epoch": 18.31,
119850
+ "learning_rate": 1.0036279793252725e-05,
119851
+ "loss": 0.4024,
119852
+ "step": 99210
119853
+ },
119854
+ {
119855
+ "epoch": 18.32,
119856
+ "learning_rate": 1.0035842452137103e-05,
119857
+ "loss": 0.4115,
119858
+ "step": 99215
119859
+ },
119860
+ {
119861
+ "epoch": 18.32,
119862
+ "learning_rate": 1.0035407762593293e-05,
119863
+ "loss": 0.4085,
119864
+ "step": 99220
119865
+ },
119866
+ {
119867
+ "epoch": 18.32,
119868
+ "learning_rate": 1.0034975724632e-05,
119869
+ "loss": 0.4079,
119870
+ "step": 99225
119871
+ },
119872
+ {
119873
+ "epoch": 18.32,
119874
+ "learning_rate": 1.0034546338263884e-05,
119875
+ "loss": 0.4126,
119876
+ "step": 99230
119877
+ },
119878
+ {
119879
+ "epoch": 18.32,
119880
+ "learning_rate": 1.0034119603499567e-05,
119881
+ "loss": 0.4075,
119882
+ "step": 99235
119883
+ },
119884
+ {
119885
+ "epoch": 18.32,
119886
+ "learning_rate": 1.003369552034955e-05,
119887
+ "loss": 0.4106,
119888
+ "step": 99240
119889
+ },
119890
+ {
119891
+ "epoch": 18.32,
119892
+ "learning_rate": 1.0033274088824313e-05,
119893
+ "loss": 0.4061,
119894
+ "step": 99245
119895
+ },
119896
+ {
119897
+ "epoch": 18.32,
119898
+ "learning_rate": 1.0032855308934238e-05,
119899
+ "loss": 0.4057,
119900
+ "step": 99250
119901
+ },
119902
+ {
119903
+ "epoch": 18.32,
119904
+ "learning_rate": 1.0032439180689676e-05,
119905
+ "loss": 0.4143,
119906
+ "step": 99255
119907
+ },
119908
+ {
119909
+ "epoch": 18.32,
119910
+ "learning_rate": 1.0032025704100874e-05,
119911
+ "loss": 0.4075,
119912
+ "step": 99260
119913
+ },
119914
+ {
119915
+ "epoch": 18.33,
119916
+ "learning_rate": 1.0031614879178065e-05,
119917
+ "loss": 0.4065,
119918
+ "step": 99265
119919
+ },
119920
+ {
119921
+ "epoch": 18.33,
119922
+ "learning_rate": 1.0031206705931356e-05,
119923
+ "loss": 0.4065,
119924
+ "step": 99270
119925
+ },
119926
+ {
119927
+ "epoch": 18.33,
119928
+ "learning_rate": 1.003080118437083e-05,
119929
+ "loss": 0.4082,
119930
+ "step": 99275
119931
+ },
119932
+ {
119933
+ "epoch": 18.33,
119934
+ "learning_rate": 1.0030398314506504e-05,
119935
+ "loss": 0.4102,
119936
+ "step": 99280
119937
+ },
119938
+ {
119939
+ "epoch": 18.33,
119940
+ "learning_rate": 1.0029998096348292e-05,
119941
+ "loss": 0.4099,
119942
+ "step": 99285
119943
+ },
119944
+ {
119945
+ "epoch": 18.33,
119946
+ "learning_rate": 1.0029600529906092e-05,
119947
+ "loss": 0.4104,
119948
+ "step": 99290
119949
+ },
119950
+ {
119951
+ "epoch": 18.33,
119952
+ "learning_rate": 1.00292056151897e-05,
119953
+ "loss": 0.4153,
119954
+ "step": 99295
119955
+ },
119956
+ {
119957
+ "epoch": 18.33,
119958
+ "learning_rate": 1.002881335220888e-05,
119959
+ "loss": 0.4129,
119960
+ "step": 99300
119961
+ },
119962
+ {
119963
+ "epoch": 18.33,
119964
+ "learning_rate": 1.0028423740973285e-05,
119965
+ "loss": 0.4081,
119966
+ "step": 99305
119967
+ },
119968
+ {
119969
+ "epoch": 18.33,
119970
+ "learning_rate": 1.0028036781492538e-05,
119971
+ "loss": 0.4113,
119972
+ "step": 99310
119973
+ },
119974
+ {
119975
+ "epoch": 18.33,
119976
+ "learning_rate": 1.0027652473776184e-05,
119977
+ "loss": 0.4107,
119978
+ "step": 99315
119979
+ },
119980
+ {
119981
+ "epoch": 18.34,
119982
+ "learning_rate": 1.0027270817833725e-05,
119983
+ "loss": 0.4021,
119984
+ "step": 99320
119985
+ },
119986
+ {
119987
+ "epoch": 18.34,
119988
+ "learning_rate": 1.0026891813674553e-05,
119989
+ "loss": 0.4101,
119990
+ "step": 99325
119991
+ },
119992
+ {
119993
+ "epoch": 18.34,
119994
+ "learning_rate": 1.0026515461308025e-05,
119995
+ "loss": 0.4071,
119996
+ "step": 99330
119997
+ },
119998
+ {
119999
+ "epoch": 18.34,
120000
+ "learning_rate": 1.0026141760743439e-05,
120001
+ "loss": 0.4101,
120002
+ "step": 99335
120003
+ },
120004
+ {
120005
+ "epoch": 18.34,
120006
+ "learning_rate": 1.0025770711990008e-05,
120007
+ "loss": 0.4055,
120008
+ "step": 99340
120009
+ },
120010
+ {
120011
+ "epoch": 18.34,
120012
+ "learning_rate": 1.0025402315056886e-05,
120013
+ "loss": 0.4078,
120014
+ "step": 99345
120015
+ },
120016
+ {
120017
+ "epoch": 18.34,
120018
+ "learning_rate": 1.0025036569953166e-05,
120019
+ "loss": 0.4069,
120020
+ "step": 99350
120021
+ },
120022
+ {
120023
+ "epoch": 18.34,
120024
+ "learning_rate": 1.0024673476687874e-05,
120025
+ "loss": 0.412,
120026
+ "step": 99355
120027
+ },
120028
+ {
120029
+ "epoch": 18.34,
120030
+ "learning_rate": 1.002431303526996e-05,
120031
+ "loss": 0.4085,
120032
+ "step": 99360
120033
+ },
120034
+ {
120035
+ "epoch": 18.34,
120036
+ "learning_rate": 1.0023955245708325e-05,
120037
+ "loss": 0.4087,
120038
+ "step": 99365
120039
+ },
120040
+ {
120041
+ "epoch": 18.35,
120042
+ "learning_rate": 1.0023600108011792e-05,
120043
+ "loss": 0.4044,
120044
+ "step": 99370
120045
+ },
120046
+ {
120047
+ "epoch": 18.35,
120048
+ "learning_rate": 1.0023247622189132e-05,
120049
+ "loss": 0.4055,
120050
+ "step": 99375
120051
+ },
120052
+ {
120053
+ "epoch": 18.35,
120054
+ "learning_rate": 1.0022897788249034e-05,
120055
+ "loss": 0.4082,
120056
+ "step": 99380
120057
+ },
120058
+ {
120059
+ "epoch": 18.35,
120060
+ "learning_rate": 1.0022550606200138e-05,
120061
+ "loss": 0.4089,
120062
+ "step": 99385
120063
+ },
120064
+ {
120065
+ "epoch": 18.35,
120066
+ "learning_rate": 1.0022206076051002e-05,
120067
+ "loss": 0.4116,
120068
+ "step": 99390
120069
+ },
120070
+ {
120071
+ "epoch": 18.35,
120072
+ "learning_rate": 1.0021864197810123e-05,
120073
+ "loss": 0.4054,
120074
+ "step": 99395
120075
+ },
120076
+ {
120077
+ "epoch": 18.35,
120078
+ "learning_rate": 1.0021524971485951e-05,
120079
+ "loss": 0.4038,
120080
+ "step": 99400
120081
+ },
120082
+ {
120083
+ "epoch": 18.35,
120084
+ "learning_rate": 1.0021188397086854e-05,
120085
+ "loss": 0.4054,
120086
+ "step": 99405
120087
+ },
120088
+ {
120089
+ "epoch": 18.35,
120090
+ "learning_rate": 1.0020854474621124e-05,
120091
+ "loss": 0.4094,
120092
+ "step": 99410
120093
+ },
120094
+ {
120095
+ "epoch": 18.35,
120096
+ "learning_rate": 1.0020523204097011e-05,
120097
+ "loss": 0.415,
120098
+ "step": 99415
120099
+ },
120100
+ {
120101
+ "epoch": 18.36,
120102
+ "learning_rate": 1.0020194585522688e-05,
120103
+ "loss": 0.4151,
120104
+ "step": 99420
120105
+ },
120106
+ {
120107
+ "epoch": 18.36,
120108
+ "learning_rate": 1.001986861890626e-05,
120109
+ "loss": 0.4111,
120110
+ "step": 99425
120111
+ },
120112
+ {
120113
+ "epoch": 18.36,
120114
+ "learning_rate": 1.001954530425577e-05,
120115
+ "loss": 0.4041,
120116
+ "step": 99430
120117
+ },
120118
+ {
120119
+ "epoch": 18.36,
120120
+ "learning_rate": 1.0019224641579193e-05,
120121
+ "loss": 0.4105,
120122
+ "step": 99435
120123
+ },
120124
+ {
120125
+ "epoch": 18.36,
120126
+ "learning_rate": 1.001890663088445e-05,
120127
+ "loss": 0.4002,
120128
+ "step": 99440
120129
+ },
120130
+ {
120131
+ "epoch": 18.36,
120132
+ "learning_rate": 1.0018591272179373e-05,
120133
+ "loss": 0.4093,
120134
+ "step": 99445
120135
+ },
120136
+ {
120137
+ "epoch": 18.36,
120138
+ "learning_rate": 1.0018278565471755e-05,
120139
+ "loss": 0.4067,
120140
+ "step": 99450
120141
+ },
120142
+ {
120143
+ "epoch": 18.36,
120144
+ "learning_rate": 1.001796851076932e-05,
120145
+ "loss": 0.4063,
120146
+ "step": 99455
120147
+ },
120148
+ {
120149
+ "epoch": 18.36,
120150
+ "learning_rate": 1.0017661108079689e-05,
120151
+ "loss": 0.411,
120152
+ "step": 99460
120153
+ },
120154
+ {
120155
+ "epoch": 18.36,
120156
+ "learning_rate": 1.0017356357410481e-05,
120157
+ "loss": 0.4074,
120158
+ "step": 99465
120159
+ },
120160
+ {
120161
+ "epoch": 18.37,
120162
+ "learning_rate": 1.0017054258769189e-05,
120163
+ "loss": 0.4023,
120164
+ "step": 99470
120165
+ },
120166
+ {
120167
+ "epoch": 18.37,
120168
+ "learning_rate": 1.0016754812163285e-05,
120169
+ "loss": 0.4112,
120170
+ "step": 99475
120171
+ },
120172
+ {
120173
+ "epoch": 18.37,
120174
+ "learning_rate": 1.0016458017600145e-05,
120175
+ "loss": 0.4056,
120176
+ "step": 99480
120177
+ },
120178
+ {
120179
+ "epoch": 18.37,
120180
+ "learning_rate": 1.0016163875087098e-05,
120181
+ "loss": 0.4081,
120182
+ "step": 99485
120183
+ },
120184
+ {
120185
+ "epoch": 18.37,
120186
+ "learning_rate": 1.0015872384631397e-05,
120187
+ "loss": 0.4075,
120188
+ "step": 99490
120189
+ },
120190
+ {
120191
+ "epoch": 18.37,
120192
+ "learning_rate": 1.0015583546240243e-05,
120193
+ "loss": 0.4126,
120194
+ "step": 99495
120195
+ },
120196
+ {
120197
+ "epoch": 18.37,
120198
+ "learning_rate": 1.0015297359920758e-05,
120199
+ "loss": 0.4031,
120200
+ "step": 99500
120201
+ },
120202
+ {
120203
+ "epoch": 18.37,
120204
+ "learning_rate": 1.0015013825679996e-05,
120205
+ "loss": 0.4064,
120206
+ "step": 99505
120207
+ },
120208
+ {
120209
+ "epoch": 18.37,
120210
+ "learning_rate": 1.0014732943524964e-05,
120211
+ "loss": 0.4032,
120212
+ "step": 99510
120213
+ },
120214
+ {
120215
+ "epoch": 18.37,
120216
+ "learning_rate": 1.0014454713462595e-05,
120217
+ "loss": 0.4062,
120218
+ "step": 99515
120219
+ },
120220
+ {
120221
+ "epoch": 18.37,
120222
+ "learning_rate": 1.0014179135499741e-05,
120223
+ "loss": 0.4057,
120224
+ "step": 99520
120225
+ },
120226
+ {
120227
+ "epoch": 18.38,
120228
+ "learning_rate": 1.0013906209643215e-05,
120229
+ "loss": 0.4094,
120230
+ "step": 99525
120231
+ },
120232
+ {
120233
+ "epoch": 18.38,
120234
+ "learning_rate": 1.0013635935899738e-05,
120235
+ "loss": 0.4036,
120236
+ "step": 99530
120237
+ },
120238
+ {
120239
+ "epoch": 18.38,
120240
+ "learning_rate": 1.0013368314275982e-05,
120241
+ "loss": 0.4058,
120242
+ "step": 99535
120243
+ },
120244
+ {
120245
+ "epoch": 18.38,
120246
+ "learning_rate": 1.0013103344778556e-05,
120247
+ "loss": 0.403,
120248
+ "step": 99540
120249
+ },
120250
+ {
120251
+ "epoch": 18.38,
120252
+ "learning_rate": 1.0012841027414003e-05,
120253
+ "loss": 0.406,
120254
+ "step": 99545
120255
+ },
120256
+ {
120257
+ "epoch": 18.38,
120258
+ "learning_rate": 1.0012581362188779e-05,
120259
+ "loss": 0.4118,
120260
+ "step": 99550
120261
+ },
120262
+ {
120263
+ "epoch": 18.38,
120264
+ "learning_rate": 1.0012324349109304e-05,
120265
+ "loss": 0.4054,
120266
+ "step": 99555
120267
+ },
120268
+ {
120269
+ "epoch": 18.38,
120270
+ "learning_rate": 1.0012069988181906e-05,
120271
+ "loss": 0.4096,
120272
+ "step": 99560
120273
+ },
120274
+ {
120275
+ "epoch": 18.38,
120276
+ "learning_rate": 1.0011818279412885e-05,
120277
+ "loss": 0.4102,
120278
+ "step": 99565
120279
+ },
120280
+ {
120281
+ "epoch": 18.38,
120282
+ "learning_rate": 1.0011569222808423e-05,
120283
+ "loss": 0.407,
120284
+ "step": 99570
120285
+ },
120286
+ {
120287
+ "epoch": 18.39,
120288
+ "learning_rate": 1.001132281837469e-05,
120289
+ "loss": 0.4094,
120290
+ "step": 99575
120291
+ },
120292
+ {
120293
+ "epoch": 18.39,
120294
+ "learning_rate": 1.0011079066117751e-05,
120295
+ "loss": 0.4093,
120296
+ "step": 99580
120297
+ },
120298
+ {
120299
+ "epoch": 18.39,
120300
+ "learning_rate": 1.001083796604363e-05,
120301
+ "loss": 0.4031,
120302
+ "step": 99585
120303
+ },
120304
+ {
120305
+ "epoch": 18.39,
120306
+ "learning_rate": 1.0010599518158261e-05,
120307
+ "loss": 0.4067,
120308
+ "step": 99590
120309
+ },
120310
+ {
120311
+ "epoch": 18.39,
120312
+ "learning_rate": 1.001036372246754e-05,
120313
+ "loss": 0.4115,
120314
+ "step": 99595
120315
+ },
120316
+ {
120317
+ "epoch": 18.39,
120318
+ "learning_rate": 1.0010130578977279e-05,
120319
+ "loss": 0.4164,
120320
+ "step": 99600
120321
+ },
120322
+ {
120323
+ "epoch": 18.39,
120324
+ "learning_rate": 1.000990008769324e-05,
120325
+ "loss": 0.4044,
120326
+ "step": 99605
120327
+ },
120328
+ {
120329
+ "epoch": 18.39,
120330
+ "learning_rate": 1.0009672248621096e-05,
120331
+ "loss": 0.4044,
120332
+ "step": 99610
120333
+ },
120334
+ {
120335
+ "epoch": 18.39,
120336
+ "learning_rate": 1.0009447061766477e-05,
120337
+ "loss": 0.4103,
120338
+ "step": 99615
120339
+ },
120340
+ {
120341
+ "epoch": 18.39,
120342
+ "learning_rate": 1.0009224527134947e-05,
120343
+ "loss": 0.4079,
120344
+ "step": 99620
120345
+ },
120346
+ {
120347
+ "epoch": 18.4,
120348
+ "learning_rate": 1.0009004644731984e-05,
120349
+ "loss": 0.4089,
120350
+ "step": 99625
120351
+ },
120352
+ {
120353
+ "epoch": 18.4,
120354
+ "learning_rate": 1.0008787414563016e-05,
120355
+ "loss": 0.4108,
120356
+ "step": 99630
120357
+ },
120358
+ {
120359
+ "epoch": 18.4,
120360
+ "learning_rate": 1.0008572836633405e-05,
120361
+ "loss": 0.4099,
120362
+ "step": 99635
120363
+ },
120364
+ {
120365
+ "epoch": 18.4,
120366
+ "learning_rate": 1.0008360910948447e-05,
120367
+ "loss": 0.4044,
120368
+ "step": 99640
120369
+ },
120370
+ {
120371
+ "epoch": 18.4,
120372
+ "learning_rate": 1.000815163751337e-05,
120373
+ "loss": 0.4017,
120374
+ "step": 99645
120375
+ },
120376
+ {
120377
+ "epoch": 18.4,
120378
+ "learning_rate": 1.0007945016333332e-05,
120379
+ "loss": 0.408,
120380
+ "step": 99650
120381
+ },
120382
+ {
120383
+ "epoch": 18.4,
120384
+ "learning_rate": 1.000774104741343e-05,
120385
+ "loss": 0.4067,
120386
+ "step": 99655
120387
+ },
120388
+ {
120389
+ "epoch": 18.4,
120390
+ "learning_rate": 1.000753973075872e-05,
120391
+ "loss": 0.4058,
120392
+ "step": 99660
120393
+ },
120394
+ {
120395
+ "epoch": 18.4,
120396
+ "learning_rate": 1.0007341066374134e-05,
120397
+ "loss": 0.406,
120398
+ "step": 99665
120399
+ },
120400
+ {
120401
+ "epoch": 18.4,
120402
+ "learning_rate": 1.0007145054264601e-05,
120403
+ "loss": 0.4101,
120404
+ "step": 99670
120405
+ },
120406
+ {
120407
+ "epoch": 18.4,
120408
+ "learning_rate": 1.0006951694434954e-05,
120409
+ "loss": 0.4063,
120410
+ "step": 99675
120411
+ },
120412
+ {
120413
+ "epoch": 18.41,
120414
+ "learning_rate": 1.0006760986889943e-05,
120415
+ "loss": 0.41,
120416
+ "step": 99680
120417
+ },
120418
+ {
120419
+ "epoch": 18.41,
120420
+ "learning_rate": 1.0006572931634295e-05,
120421
+ "loss": 0.4072,
120422
+ "step": 99685
120423
+ },
120424
+ {
120425
+ "epoch": 18.41,
120426
+ "learning_rate": 1.0006387528672654e-05,
120427
+ "loss": 0.4118,
120428
+ "step": 99690
120429
+ },
120430
+ {
120431
+ "epoch": 18.41,
120432
+ "learning_rate": 1.0006204778009574e-05,
120433
+ "loss": 0.4164,
120434
+ "step": 99695
120435
+ },
120436
+ {
120437
+ "epoch": 18.41,
120438
+ "learning_rate": 1.0006024679649572e-05,
120439
+ "loss": 0.4075,
120440
+ "step": 99700
120441
+ },
120442
+ {
120443
+ "epoch": 18.41,
120444
+ "learning_rate": 1.000584723359711e-05,
120445
+ "loss": 0.4063,
120446
+ "step": 99705
120447
+ },
120448
+ {
120449
+ "epoch": 18.41,
120450
+ "learning_rate": 1.000567243985653e-05,
120451
+ "loss": 0.4074,
120452
+ "step": 99710
120453
+ },
120454
+ {
120455
+ "epoch": 18.41,
120456
+ "learning_rate": 1.0005500298432178e-05,
120457
+ "loss": 0.4118,
120458
+ "step": 99715
120459
+ },
120460
+ {
120461
+ "epoch": 18.41,
120462
+ "learning_rate": 1.0005330809328291e-05,
120463
+ "loss": 0.4088,
120464
+ "step": 99720
120465
+ },
120466
+ {
120467
+ "epoch": 18.41,
120468
+ "learning_rate": 1.0005163972549048e-05,
120469
+ "loss": 0.409,
120470
+ "step": 99725
120471
+ },
120472
+ {
120473
+ "epoch": 18.42,
120474
+ "learning_rate": 1.000499978809855e-05,
120475
+ "loss": 0.4069,
120476
+ "step": 99730
120477
+ },
120478
+ {
120479
+ "epoch": 18.42,
120480
+ "learning_rate": 1.0004838255980884e-05,
120481
+ "loss": 0.4042,
120482
+ "step": 99735
120483
+ },
120484
+ {
120485
+ "epoch": 18.42,
120486
+ "learning_rate": 1.0004679376200009e-05,
120487
+ "loss": 0.4142,
120488
+ "step": 99740
120489
+ },
120490
+ {
120491
+ "epoch": 18.42,
120492
+ "learning_rate": 1.0004523148759851e-05,
120493
+ "loss": 0.4096,
120494
+ "step": 99745
120495
+ },
120496
+ {
120497
+ "epoch": 18.42,
120498
+ "learning_rate": 1.0004369573664269e-05,
120499
+ "loss": 0.409,
120500
+ "step": 99750
120501
+ },
120502
+ {
120503
+ "epoch": 18.42,
120504
+ "learning_rate": 1.0004218650917053e-05,
120505
+ "loss": 0.4107,
120506
+ "step": 99755
120507
+ },
120508
+ {
120509
+ "epoch": 18.42,
120510
+ "learning_rate": 1.0004070380521919e-05,
120511
+ "loss": 0.4091,
120512
+ "step": 99760
120513
+ },
120514
+ {
120515
+ "epoch": 18.42,
120516
+ "learning_rate": 1.0003924762482518e-05,
120517
+ "loss": 0.4059,
120518
+ "step": 99765
120519
+ },
120520
+ {
120521
+ "epoch": 18.42,
120522
+ "learning_rate": 1.0003781796802478e-05,
120523
+ "loss": 0.4076,
120524
+ "step": 99770
120525
+ },
120526
+ {
120527
+ "epoch": 18.42,
120528
+ "learning_rate": 1.0003641483485282e-05,
120529
+ "loss": 0.4045,
120530
+ "step": 99775
120531
+ },
120532
+ {
120533
+ "epoch": 18.43,
120534
+ "learning_rate": 1.000350382253443e-05,
120535
+ "loss": 0.4117,
120536
+ "step": 99780
120537
+ },
120538
+ {
120539
+ "epoch": 18.43,
120540
+ "learning_rate": 1.0003368813953286e-05,
120541
+ "loss": 0.4119,
120542
+ "step": 99785
120543
+ },
120544
+ {
120545
+ "epoch": 18.43,
120546
+ "learning_rate": 1.0003236457745215e-05,
120547
+ "loss": 0.4067,
120548
+ "step": 99790
120549
+ },
120550
+ {
120551
+ "epoch": 18.43,
120552
+ "learning_rate": 1.0003106753913454e-05,
120553
+ "loss": 0.41,
120554
+ "step": 99795
120555
+ },
120556
+ {
120557
+ "epoch": 18.43,
120558
+ "learning_rate": 1.0002979702461211e-05,
120559
+ "loss": 0.4118,
120560
+ "step": 99800
120561
+ },
120562
+ {
120563
+ "epoch": 18.43,
120564
+ "learning_rate": 1.0002855303391625e-05,
120565
+ "loss": 0.4082,
120566
+ "step": 99805
120567
+ },
120568
+ {
120569
+ "epoch": 18.43,
120570
+ "learning_rate": 1.0002733556707777e-05,
120571
+ "loss": 0.4099,
120572
+ "step": 99810
120573
+ },
120574
+ {
120575
+ "epoch": 18.43,
120576
+ "learning_rate": 1.0002614462412648e-05,
120577
+ "loss": 0.4129,
120578
+ "step": 99815
120579
+ },
120580
+ {
120581
+ "epoch": 18.43,
120582
+ "learning_rate": 1.000249802050919e-05,
120583
+ "loss": 0.4092,
120584
+ "step": 99820
120585
+ },
120586
+ {
120587
+ "epoch": 18.43,
120588
+ "learning_rate": 1.0002384231000275e-05,
120589
+ "loss": 0.4107,
120590
+ "step": 99825
120591
+ },
120592
+ {
120593
+ "epoch": 18.44,
120594
+ "learning_rate": 1.0002273093888698e-05,
120595
+ "loss": 0.4083,
120596
+ "step": 99830
120597
+ },
120598
+ {
120599
+ "epoch": 18.44,
120600
+ "learning_rate": 1.0002164609177228e-05,
120601
+ "loss": 0.4097,
120602
+ "step": 99835
120603
+ },
120604
+ {
120605
+ "epoch": 18.44,
120606
+ "learning_rate": 1.0002058776868513e-05,
120607
+ "loss": 0.4085,
120608
+ "step": 99840
120609
+ },
120610
+ {
120611
+ "epoch": 18.44,
120612
+ "learning_rate": 1.0001955596965181e-05,
120613
+ "loss": 0.4072,
120614
+ "step": 99845
120615
+ },
120616
+ {
120617
+ "epoch": 18.44,
120618
+ "learning_rate": 1.0001855069469772e-05,
120619
+ "loss": 0.4096,
120620
+ "step": 99850
120621
+ },
120622
+ {
120623
+ "epoch": 18.44,
120624
+ "learning_rate": 1.000175719438478e-05,
120625
+ "loss": 0.4097,
120626
+ "step": 99855
120627
+ },
120628
+ {
120629
+ "epoch": 18.44,
120630
+ "learning_rate": 1.0001661971712595e-05,
120631
+ "loss": 0.4089,
120632
+ "step": 99860
120633
+ },
120634
+ {
120635
+ "epoch": 18.44,
120636
+ "learning_rate": 1.0001569401455578e-05,
120637
+ "loss": 0.4112,
120638
+ "step": 99865
120639
+ },
120640
+ {
120641
+ "epoch": 18.44,
120642
+ "learning_rate": 1.0001479483616021e-05,
120643
+ "loss": 0.4012,
120644
+ "step": 99870
120645
+ },
120646
+ {
120647
+ "epoch": 18.44,
120648
+ "learning_rate": 1.0001392218196143e-05,
120649
+ "loss": 0.4062,
120650
+ "step": 99875
120651
+ },
120652
+ {
120653
+ "epoch": 18.44,
120654
+ "learning_rate": 1.0001307605198083e-05,
120655
+ "loss": 0.4054,
120656
+ "step": 99880
120657
+ },
120658
+ {
120659
+ "epoch": 18.45,
120660
+ "learning_rate": 1.0001225644623938e-05,
120661
+ "loss": 0.4085,
120662
+ "step": 99885
120663
+ },
120664
+ {
120665
+ "epoch": 18.45,
120666
+ "learning_rate": 1.0001146336475726e-05,
120667
+ "loss": 0.4098,
120668
+ "step": 99890
120669
+ },
120670
+ {
120671
+ "epoch": 18.45,
120672
+ "learning_rate": 1.0001069680755418e-05,
120673
+ "loss": 0.4079,
120674
+ "step": 99895
120675
+ },
120676
+ {
120677
+ "epoch": 18.45,
120678
+ "learning_rate": 1.0000995677464887e-05,
120679
+ "loss": 0.4102,
120680
+ "step": 99900
120681
+ },
120682
+ {
120683
+ "epoch": 18.45,
120684
+ "learning_rate": 1.0000924326605957e-05,
120685
+ "loss": 0.4079,
120686
+ "step": 99905
120687
+ },
120688
+ {
120689
+ "epoch": 18.45,
120690
+ "learning_rate": 1.000085562818041e-05,
120691
+ "loss": 0.41,
120692
+ "step": 99910
120693
+ },
120694
+ {
120695
+ "epoch": 18.45,
120696
+ "learning_rate": 1.0000789582189913e-05,
120697
+ "loss": 0.405,
120698
+ "step": 99915
120699
+ },
120700
+ {
120701
+ "epoch": 18.45,
120702
+ "learning_rate": 1.0000726188636128e-05,
120703
+ "loss": 0.4091,
120704
+ "step": 99920
120705
+ },
120706
+ {
120707
+ "epoch": 18.45,
120708
+ "learning_rate": 1.0000665447520593e-05,
120709
+ "loss": 0.4043,
120710
+ "step": 99925
120711
+ },
120712
+ {
120713
+ "epoch": 18.45,
120714
+ "learning_rate": 1.0000607358844825e-05,
120715
+ "loss": 0.4068,
120716
+ "step": 99930
120717
+ },
120718
+ {
120719
+ "epoch": 18.46,
120720
+ "learning_rate": 1.0000551922610243e-05,
120721
+ "loss": 0.4049,
120722
+ "step": 99935
120723
+ },
120724
+ {
120725
+ "epoch": 18.46,
120726
+ "learning_rate": 1.000049913881822e-05,
120727
+ "loss": 0.4048,
120728
+ "step": 99940
120729
+ },
120730
+ {
120731
+ "epoch": 18.46,
120732
+ "learning_rate": 1.0000449007470055e-05,
120733
+ "loss": 0.412,
120734
+ "step": 99945
120735
+ },
120736
+ {
120737
+ "epoch": 18.46,
120738
+ "learning_rate": 1.0000401528566993e-05,
120739
+ "loss": 0.4076,
120740
+ "step": 99950
120741
+ },
120742
+ {
120743
+ "epoch": 18.46,
120744
+ "learning_rate": 1.00003567021102e-05,
120745
+ "loss": 0.4085,
120746
+ "step": 99955
120747
+ },
120748
+ {
120749
+ "epoch": 18.46,
120750
+ "learning_rate": 1.0000314528100778e-05,
120751
+ "loss": 0.405,
120752
+ "step": 99960
120753
+ },
120754
+ {
120755
+ "epoch": 18.46,
120756
+ "learning_rate": 1.0000275006539773e-05,
120757
+ "loss": 0.4089,
120758
+ "step": 99965
120759
+ },
120760
+ {
120761
+ "epoch": 18.46,
120762
+ "learning_rate": 1.0000238137428167e-05,
120763
+ "loss": 0.4059,
120764
+ "step": 99970
120765
+ },
120766
+ {
120767
+ "epoch": 18.46,
120768
+ "learning_rate": 1.0000203920766867e-05,
120769
+ "loss": 0.4062,
120770
+ "step": 99975
120771
+ },
120772
+ {
120773
+ "epoch": 18.46,
120774
+ "learning_rate": 1.0000172356556704e-05,
120775
+ "loss": 0.4075,
120776
+ "step": 99980
120777
+ },
120778
+ {
120779
+ "epoch": 18.47,
120780
+ "learning_rate": 1.000014344479847e-05,
120781
+ "loss": 0.4041,
120782
+ "step": 99985
120783
+ },
120784
+ {
120785
+ "epoch": 18.47,
120786
+ "learning_rate": 1.0000117185492867e-05,
120787
+ "loss": 0.4062,
120788
+ "step": 99990
120789
+ },
120790
+ {
120791
+ "epoch": 18.47,
120792
+ "learning_rate": 1.0000093578640555e-05,
120793
+ "loss": 0.4035,
120794
+ "step": 99995
120795
+ },
120796
+ {
120797
+ "epoch": 18.47,
120798
+ "learning_rate": 1.0000072624242102e-05,
120799
+ "loss": 0.408,
120800
+ "step": 100000
120801
+ },
120802
+ {
120803
+ "epoch": 18.47,
120804
+ "eval_loss": 0.38928771018981934,
120805
+ "eval_runtime": 99.578,
120806
+ "eval_samples_per_second": 50.212,
120807
+ "eval_steps_per_second": 0.532,
120808
+ "step": 100000
120809
  }
120810
  ],
120811
  "max_steps": 100000,
120812
  "num_train_epochs": 20,
120813
+ "total_flos": 1.0515635686869263e+22,
120814
  "trial_name": null,
120815
  "trial_params": null
120816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba5c39390d379753ab42fe112a3e238b37bd25f743670804092f31c48fc72aa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ece555ebd5e5c6fe697b5c0892a0069538c02673bcdea4dd5ba7a2fbb86221c
3
  size 449471589