TracyTank commited on
Commit
3c07ba1
1 Parent(s): f17c5a1

Training in progress, step 717, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:592b68605894e86d34ecaa987148ecc135ddcf3a398bf0de5ed019230fbe6686
3
  size 2145944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9515a7002cfa7f67e6c746a7e59b868de77da444db2b95a7c9cb039f0922f08
3
  size 2145944
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c979173d7f6ff65d48bd761a4f4e3036f97e9501eee7d10c03f95c65f9aca46
3
  size 4310020
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7971e0a79864672f8071d25876b2985e088af6d6ac063d1237778d2eaf4fd19b
3
  size 4310020
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f17d198c6bc92e13ab30588aabe29f9d4ef9d544c31a712f88bda72bb645002
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b753a7aacbc56b237a570162811e1a71ee10b02e1bee93e6daab70cd4680802
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4d8d936c55bd98fb6e06d6d2e9ea8b77b7477b4be838f2900625a1f2e54a247
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1fb5420c0f25e5f6b2392bd3436352035eb0debaec1745bf48bdc44f8008fc2
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6452ba32beb2d62c78d0fb1076f7eab59418140c36ab55958a74d3a304076a7d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c82e31e1ac17c78c3d52136bae19d297a1c91d03f90b220e02fe92fb6cac45
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:517271d63e08e411e99f60e5080a584f8c12830073484d75a5dd81129034601d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1705ef3a08286669695bd3c71729dc6c292514e0dbba93fac60bc2587d6740cc
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c45a9042c7f8d6bf5fc1d662ce3fcd2027723874b3c4568f6f59a481cd03e9b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ebf93c877c54673a5776fbfe0e2f8ca99926be562e6e5f55038ce1f0e80b42
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.659537771645395,
5
  "eval_steps": 500,
6
- "global_step": 478,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3353,6 +3353,1679 @@
3353
  "learning_rate": 3.368986053895655e-05,
3354
  "loss": 9.7648,
3355
  "step": 478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3356
  }
3357
  ],
3358
  "logging_steps": 1,
@@ -3372,7 +5045,7 @@
3372
  "attributes": {}
3373
  }
3374
  },
3375
- "total_flos": 396899546824704.0,
3376
  "train_batch_size": 4,
3377
  "trial_name": null,
3378
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9893066574680924,
5
  "eval_steps": 500,
6
+ "global_step": 717,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3353
  "learning_rate": 3.368986053895655e-05,
3354
  "loss": 9.7648,
3355
  "step": 478
3356
+ },
3357
+ {
3358
+ "epoch": 0.6609175577785443,
3359
+ "grad_norm": 1.1496732234954834,
3360
+ "learning_rate": 3.345210775911166e-05,
3361
+ "loss": 9.7602,
3362
+ "step": 479
3363
+ },
3364
+ {
3365
+ "epoch": 0.6622973439116937,
3366
+ "grad_norm": 1.1272746324539185,
3367
+ "learning_rate": 3.3214774421669774e-05,
3368
+ "loss": 9.7809,
3369
+ "step": 480
3370
+ },
3371
+ {
3372
+ "epoch": 0.6636771300448431,
3373
+ "grad_norm": 1.2115309238433838,
3374
+ "learning_rate": 3.297786654236169e-05,
3375
+ "loss": 9.7554,
3376
+ "step": 481
3377
+ },
3378
+ {
3379
+ "epoch": 0.6650569161779925,
3380
+ "grad_norm": 1.1891155242919922,
3381
+ "learning_rate": 3.2741390126134106e-05,
3382
+ "loss": 9.766,
3383
+ "step": 482
3384
+ },
3385
+ {
3386
+ "epoch": 0.6664367023111418,
3387
+ "grad_norm": 1.2283670902252197,
3388
+ "learning_rate": 3.250535116699729e-05,
3389
+ "loss": 9.7619,
3390
+ "step": 483
3391
+ },
3392
+ {
3393
+ "epoch": 0.6678164884442911,
3394
+ "grad_norm": 1.259109377861023,
3395
+ "learning_rate": 3.226975564787322e-05,
3396
+ "loss": 9.7414,
3397
+ "step": 484
3398
+ },
3399
+ {
3400
+ "epoch": 0.6691962745774405,
3401
+ "grad_norm": 1.322930932044983,
3402
+ "learning_rate": 3.2034609540443915e-05,
3403
+ "loss": 9.7465,
3404
+ "step": 485
3405
+ },
3406
+ {
3407
+ "epoch": 0.6705760607105898,
3408
+ "grad_norm": 1.3455944061279297,
3409
+ "learning_rate": 3.179991880500009e-05,
3410
+ "loss": 9.7221,
3411
+ "step": 486
3412
+ },
3413
+ {
3414
+ "epoch": 0.6719558468437392,
3415
+ "grad_norm": 1.5582841634750366,
3416
+ "learning_rate": 3.1565689390290066e-05,
3417
+ "loss": 9.6821,
3418
+ "step": 487
3419
+ },
3420
+ {
3421
+ "epoch": 0.6733356329768886,
3422
+ "grad_norm": 1.1807150840759277,
3423
+ "learning_rate": 3.133192723336895e-05,
3424
+ "loss": 9.7562,
3425
+ "step": 488
3426
+ },
3427
+ {
3428
+ "epoch": 0.674715419110038,
3429
+ "grad_norm": 0.9975462555885315,
3430
+ "learning_rate": 3.109863825944823e-05,
3431
+ "loss": 9.7919,
3432
+ "step": 489
3433
+ },
3434
+ {
3435
+ "epoch": 0.6760952052431873,
3436
+ "grad_norm": 1.0711045265197754,
3437
+ "learning_rate": 3.086582838174551e-05,
3438
+ "loss": 9.7813,
3439
+ "step": 490
3440
+ },
3441
+ {
3442
+ "epoch": 0.6774749913763367,
3443
+ "grad_norm": 1.1586861610412598,
3444
+ "learning_rate": 3.063350350133467e-05,
3445
+ "loss": 9.7722,
3446
+ "step": 491
3447
+ },
3448
+ {
3449
+ "epoch": 0.6788547775094861,
3450
+ "grad_norm": 1.228026270866394,
3451
+ "learning_rate": 3.0401669506996256e-05,
3452
+ "loss": 9.7511,
3453
+ "step": 492
3454
+ },
3455
+ {
3456
+ "epoch": 0.6802345636426353,
3457
+ "grad_norm": 1.205870270729065,
3458
+ "learning_rate": 3.0170332275068246e-05,
3459
+ "loss": 9.7468,
3460
+ "step": 493
3461
+ },
3462
+ {
3463
+ "epoch": 0.6816143497757847,
3464
+ "grad_norm": 1.283031702041626,
3465
+ "learning_rate": 2.9939497669297112e-05,
3466
+ "loss": 9.7567,
3467
+ "step": 494
3468
+ },
3469
+ {
3470
+ "epoch": 0.6829941359089341,
3471
+ "grad_norm": 1.2276543378829956,
3472
+ "learning_rate": 2.970917154068914e-05,
3473
+ "loss": 9.7648,
3474
+ "step": 495
3475
+ },
3476
+ {
3477
+ "epoch": 0.6843739220420835,
3478
+ "grad_norm": 1.2356555461883545,
3479
+ "learning_rate": 2.9479359727362173e-05,
3480
+ "loss": 9.7521,
3481
+ "step": 496
3482
+ },
3483
+ {
3484
+ "epoch": 0.6857537081752328,
3485
+ "grad_norm": 1.301045298576355,
3486
+ "learning_rate": 2.92500680543976e-05,
3487
+ "loss": 9.7518,
3488
+ "step": 497
3489
+ },
3490
+ {
3491
+ "epoch": 0.6871334943083822,
3492
+ "grad_norm": 1.424596905708313,
3493
+ "learning_rate": 2.9021302333692734e-05,
3494
+ "loss": 9.7215,
3495
+ "step": 498
3496
+ },
3497
+ {
3498
+ "epoch": 0.6885132804415316,
3499
+ "grad_norm": 1.409056305885315,
3500
+ "learning_rate": 2.879306836381345e-05,
3501
+ "loss": 9.7073,
3502
+ "step": 499
3503
+ },
3504
+ {
3505
+ "epoch": 0.689893066574681,
3506
+ "grad_norm": 1.6761122941970825,
3507
+ "learning_rate": 2.8565371929847284e-05,
3508
+ "loss": 9.6887,
3509
+ "step": 500
3510
+ },
3511
+ {
3512
+ "epoch": 0.6912728527078302,
3513
+ "grad_norm": 0.9244851469993591,
3514
+ "learning_rate": 2.8338218803256745e-05,
3515
+ "loss": 9.8076,
3516
+ "step": 501
3517
+ },
3518
+ {
3519
+ "epoch": 0.6926526388409796,
3520
+ "grad_norm": 1.1031217575073242,
3521
+ "learning_rate": 2.811161474173297e-05,
3522
+ "loss": 9.7844,
3523
+ "step": 502
3524
+ },
3525
+ {
3526
+ "epoch": 0.694032424974129,
3527
+ "grad_norm": 1.1576576232910156,
3528
+ "learning_rate": 2.7885565489049946e-05,
3529
+ "loss": 9.7647,
3530
+ "step": 503
3531
+ },
3532
+ {
3533
+ "epoch": 0.6954122111072784,
3534
+ "grad_norm": 1.1267688274383545,
3535
+ "learning_rate": 2.7660076774918708e-05,
3536
+ "loss": 9.786,
3537
+ "step": 504
3538
+ },
3539
+ {
3540
+ "epoch": 0.6967919972404277,
3541
+ "grad_norm": 1.165797472000122,
3542
+ "learning_rate": 2.7435154314842338e-05,
3543
+ "loss": 9.7736,
3544
+ "step": 505
3545
+ },
3546
+ {
3547
+ "epoch": 0.6981717833735771,
3548
+ "grad_norm": 1.1887449026107788,
3549
+ "learning_rate": 2.7210803809970853e-05,
3550
+ "loss": 9.7515,
3551
+ "step": 506
3552
+ },
3553
+ {
3554
+ "epoch": 0.6995515695067265,
3555
+ "grad_norm": 1.2904964685440063,
3556
+ "learning_rate": 2.6987030946956936e-05,
3557
+ "loss": 9.7602,
3558
+ "step": 507
3559
+ },
3560
+ {
3561
+ "epoch": 0.7009313556398759,
3562
+ "grad_norm": 1.2434381246566772,
3563
+ "learning_rate": 2.6763841397811573e-05,
3564
+ "loss": 9.7476,
3565
+ "step": 508
3566
+ },
3567
+ {
3568
+ "epoch": 0.7023111417730252,
3569
+ "grad_norm": 1.282375454902649,
3570
+ "learning_rate": 2.654124081976048e-05,
3571
+ "loss": 9.7474,
3572
+ "step": 509
3573
+ },
3574
+ {
3575
+ "epoch": 0.7036909279061745,
3576
+ "grad_norm": 1.3851263523101807,
3577
+ "learning_rate": 2.631923485510054e-05,
3578
+ "loss": 9.7208,
3579
+ "step": 510
3580
+ },
3581
+ {
3582
+ "epoch": 0.7050707140393239,
3583
+ "grad_norm": 1.4085386991500854,
3584
+ "learning_rate": 2.6097829131056907e-05,
3585
+ "loss": 9.7317,
3586
+ "step": 511
3587
+ },
3588
+ {
3589
+ "epoch": 0.7064505001724732,
3590
+ "grad_norm": 1.4174658060073853,
3591
+ "learning_rate": 2.587702925964034e-05,
3592
+ "loss": 9.7054,
3593
+ "step": 512
3594
+ },
3595
+ {
3596
+ "epoch": 0.7078302863056226,
3597
+ "grad_norm": 1.1571439504623413,
3598
+ "learning_rate": 2.565684083750488e-05,
3599
+ "loss": 9.7617,
3600
+ "step": 513
3601
+ },
3602
+ {
3603
+ "epoch": 0.709210072438772,
3604
+ "grad_norm": 1.0106278657913208,
3605
+ "learning_rate": 2.5437269445806145e-05,
3606
+ "loss": 9.7957,
3607
+ "step": 514
3608
+ },
3609
+ {
3610
+ "epoch": 0.7105898585719214,
3611
+ "grad_norm": 1.1029208898544312,
3612
+ "learning_rate": 2.5218320650059657e-05,
3613
+ "loss": 9.7729,
3614
+ "step": 515
3615
+ },
3616
+ {
3617
+ "epoch": 0.7119696447050707,
3618
+ "grad_norm": 1.134572148323059,
3619
+ "learning_rate": 2.500000000000001e-05,
3620
+ "loss": 9.7702,
3621
+ "step": 516
3622
+ },
3623
+ {
3624
+ "epoch": 0.7133494308382201,
3625
+ "grad_norm": 1.1734071969985962,
3626
+ "learning_rate": 2.478231302943997e-05,
3627
+ "loss": 9.782,
3628
+ "step": 517
3629
+ },
3630
+ {
3631
+ "epoch": 0.7147292169713695,
3632
+ "grad_norm": 1.220053791999817,
3633
+ "learning_rate": 2.4565265256130394e-05,
3634
+ "loss": 9.7545,
3635
+ "step": 518
3636
+ },
3637
+ {
3638
+ "epoch": 0.7161090031045187,
3639
+ "grad_norm": 1.2671356201171875,
3640
+ "learning_rate": 2.4348862181620224e-05,
3641
+ "loss": 9.7482,
3642
+ "step": 519
3643
+ },
3644
+ {
3645
+ "epoch": 0.7174887892376681,
3646
+ "grad_norm": 1.2364708185195923,
3647
+ "learning_rate": 2.4133109291117156e-05,
3648
+ "loss": 9.7619,
3649
+ "step": 520
3650
+ },
3651
+ {
3652
+ "epoch": 0.7188685753708175,
3653
+ "grad_norm": 1.251885175704956,
3654
+ "learning_rate": 2.391801205334856e-05,
3655
+ "loss": 9.7535,
3656
+ "step": 521
3657
+ },
3658
+ {
3659
+ "epoch": 0.7202483615039669,
3660
+ "grad_norm": 1.345314621925354,
3661
+ "learning_rate": 2.3703575920422795e-05,
3662
+ "loss": 9.7491,
3663
+ "step": 522
3664
+ },
3665
+ {
3666
+ "epoch": 0.7216281476371162,
3667
+ "grad_norm": 1.4312492609024048,
3668
+ "learning_rate": 2.3489806327691155e-05,
3669
+ "loss": 9.7244,
3670
+ "step": 523
3671
+ },
3672
+ {
3673
+ "epoch": 0.7230079337702656,
3674
+ "grad_norm": 1.4245972633361816,
3675
+ "learning_rate": 2.3276708693609943e-05,
3676
+ "loss": 9.7038,
3677
+ "step": 524
3678
+ },
3679
+ {
3680
+ "epoch": 0.724387719903415,
3681
+ "grad_norm": 1.5328452587127686,
3682
+ "learning_rate": 2.3064288419603292e-05,
3683
+ "loss": 9.6869,
3684
+ "step": 525
3685
+ },
3686
+ {
3687
+ "epoch": 0.7257675060365644,
3688
+ "grad_norm": 0.9019702672958374,
3689
+ "learning_rate": 2.2852550889926067e-05,
3690
+ "loss": 9.8176,
3691
+ "step": 526
3692
+ },
3693
+ {
3694
+ "epoch": 0.7271472921697137,
3695
+ "grad_norm": 1.0406774282455444,
3696
+ "learning_rate": 2.2641501471527595e-05,
3697
+ "loss": 9.7709,
3698
+ "step": 527
3699
+ },
3700
+ {
3701
+ "epoch": 0.728527078302863,
3702
+ "grad_norm": 1.0880190134048462,
3703
+ "learning_rate": 2.243114551391542e-05,
3704
+ "loss": 9.7687,
3705
+ "step": 528
3706
+ },
3707
+ {
3708
+ "epoch": 0.7299068644360124,
3709
+ "grad_norm": 1.1651344299316406,
3710
+ "learning_rate": 2.2221488349019903e-05,
3711
+ "loss": 9.7504,
3712
+ "step": 529
3713
+ },
3714
+ {
3715
+ "epoch": 0.7312866505691618,
3716
+ "grad_norm": 1.1736459732055664,
3717
+ "learning_rate": 2.20125352910589e-05,
3718
+ "loss": 9.7507,
3719
+ "step": 530
3720
+ },
3721
+ {
3722
+ "epoch": 0.7326664367023111,
3723
+ "grad_norm": 1.196784257888794,
3724
+ "learning_rate": 2.18042916364032e-05,
3725
+ "loss": 9.7769,
3726
+ "step": 531
3727
+ },
3728
+ {
3729
+ "epoch": 0.7340462228354605,
3730
+ "grad_norm": 1.2587947845458984,
3731
+ "learning_rate": 2.1596762663442218e-05,
3732
+ "loss": 9.7598,
3733
+ "step": 532
3734
+ },
3735
+ {
3736
+ "epoch": 0.7354260089686099,
3737
+ "grad_norm": 1.1886099576950073,
3738
+ "learning_rate": 2.1389953632450145e-05,
3739
+ "loss": 9.7561,
3740
+ "step": 533
3741
+ },
3742
+ {
3743
+ "epoch": 0.7368057951017593,
3744
+ "grad_norm": 1.3150951862335205,
3745
+ "learning_rate": 2.118386978545274e-05,
3746
+ "loss": 9.7332,
3747
+ "step": 534
3748
+ },
3749
+ {
3750
+ "epoch": 0.7381855812349086,
3751
+ "grad_norm": 1.3374465703964233,
3752
+ "learning_rate": 2.097851634609434e-05,
3753
+ "loss": 9.7546,
3754
+ "step": 535
3755
+ },
3756
+ {
3757
+ "epoch": 0.739565367368058,
3758
+ "grad_norm": 1.3926987648010254,
3759
+ "learning_rate": 2.077389851950557e-05,
3760
+ "loss": 9.7265,
3761
+ "step": 536
3762
+ },
3763
+ {
3764
+ "epoch": 0.7409451535012073,
3765
+ "grad_norm": 1.4018597602844238,
3766
+ "learning_rate": 2.0570021492171264e-05,
3767
+ "loss": 9.7042,
3768
+ "step": 537
3769
+ },
3770
+ {
3771
+ "epoch": 0.7423249396343566,
3772
+ "grad_norm": 1.165271520614624,
3773
+ "learning_rate": 2.0366890431799167e-05,
3774
+ "loss": 9.7577,
3775
+ "step": 538
3776
+ },
3777
+ {
3778
+ "epoch": 0.743704725767506,
3779
+ "grad_norm": 1.0279182195663452,
3780
+ "learning_rate": 2.0164510487188798e-05,
3781
+ "loss": 9.7801,
3782
+ "step": 539
3783
+ },
3784
+ {
3785
+ "epoch": 0.7450845119006554,
3786
+ "grad_norm": 1.0791254043579102,
3787
+ "learning_rate": 1.996288678810105e-05,
3788
+ "loss": 9.7826,
3789
+ "step": 540
3790
+ },
3791
+ {
3792
+ "epoch": 0.7464642980338048,
3793
+ "grad_norm": 1.1344528198242188,
3794
+ "learning_rate": 1.976202444512813e-05,
3795
+ "loss": 9.7708,
3796
+ "step": 541
3797
+ },
3798
+ {
3799
+ "epoch": 0.7478440841669541,
3800
+ "grad_norm": 1.1656285524368286,
3801
+ "learning_rate": 1.9561928549563968e-05,
3802
+ "loss": 9.7673,
3803
+ "step": 542
3804
+ },
3805
+ {
3806
+ "epoch": 0.7492238703001035,
3807
+ "grad_norm": 1.1736102104187012,
3808
+ "learning_rate": 1.9362604173275268e-05,
3809
+ "loss": 9.7814,
3810
+ "step": 543
3811
+ },
3812
+ {
3813
+ "epoch": 0.7506036564332529,
3814
+ "grad_norm": 1.2199715375900269,
3815
+ "learning_rate": 1.9164056368572846e-05,
3816
+ "loss": 9.7803,
3817
+ "step": 544
3818
+ },
3819
+ {
3820
+ "epoch": 0.7519834425664023,
3821
+ "grad_norm": 1.196496844291687,
3822
+ "learning_rate": 1.8966290168083677e-05,
3823
+ "loss": 9.7406,
3824
+ "step": 545
3825
+ },
3826
+ {
3827
+ "epoch": 0.7533632286995515,
3828
+ "grad_norm": 1.2514899969100952,
3829
+ "learning_rate": 1.87693105846232e-05,
3830
+ "loss": 9.7567,
3831
+ "step": 546
3832
+ },
3833
+ {
3834
+ "epoch": 0.7547430148327009,
3835
+ "grad_norm": 1.3064500093460083,
3836
+ "learning_rate": 1.8573122611068406e-05,
3837
+ "loss": 9.7289,
3838
+ "step": 547
3839
+ },
3840
+ {
3841
+ "epoch": 0.7561228009658503,
3842
+ "grad_norm": 1.400439739227295,
3843
+ "learning_rate": 1.837773122023114e-05,
3844
+ "loss": 9.7237,
3845
+ "step": 548
3846
+ },
3847
+ {
3848
+ "epoch": 0.7575025870989996,
3849
+ "grad_norm": 1.3692952394485474,
3850
+ "learning_rate": 1.818314136473217e-05,
3851
+ "loss": 9.7226,
3852
+ "step": 549
3853
+ },
3854
+ {
3855
+ "epoch": 0.758882373232149,
3856
+ "grad_norm": 1.5424178838729858,
3857
+ "learning_rate": 1.7989357976875603e-05,
3858
+ "loss": 9.6992,
3859
+ "step": 550
3860
+ },
3861
+ {
3862
+ "epoch": 0.7602621593652984,
3863
+ "grad_norm": 0.8980839848518372,
3864
+ "learning_rate": 1.7796385968523815e-05,
3865
+ "loss": 9.811,
3866
+ "step": 551
3867
+ },
3868
+ {
3869
+ "epoch": 0.7616419454984478,
3870
+ "grad_norm": 0.9639766216278076,
3871
+ "learning_rate": 1.760423023097307e-05,
3872
+ "loss": 9.7805,
3873
+ "step": 552
3874
+ },
3875
+ {
3876
+ "epoch": 0.7630217316315971,
3877
+ "grad_norm": 1.0791096687316895,
3878
+ "learning_rate": 1.741289563482939e-05,
3879
+ "loss": 9.7779,
3880
+ "step": 553
3881
+ },
3882
+ {
3883
+ "epoch": 0.7644015177647465,
3884
+ "grad_norm": 1.1886143684387207,
3885
+ "learning_rate": 1.7222387029885268e-05,
3886
+ "loss": 9.7533,
3887
+ "step": 554
3888
+ },
3889
+ {
3890
+ "epoch": 0.7657813038978958,
3891
+ "grad_norm": 1.2121399641036987,
3892
+ "learning_rate": 1.703270924499656e-05,
3893
+ "loss": 9.756,
3894
+ "step": 555
3895
+ },
3896
+ {
3897
+ "epoch": 0.7671610900310452,
3898
+ "grad_norm": 1.2668524980545044,
3899
+ "learning_rate": 1.684386708796025e-05,
3900
+ "loss": 9.7412,
3901
+ "step": 556
3902
+ },
3903
+ {
3904
+ "epoch": 0.7685408761641945,
3905
+ "grad_norm": 1.212928056716919,
3906
+ "learning_rate": 1.665586534539246e-05,
3907
+ "loss": 9.7641,
3908
+ "step": 557
3909
+ },
3910
+ {
3911
+ "epoch": 0.7699206622973439,
3912
+ "grad_norm": 1.2839542627334595,
3913
+ "learning_rate": 1.646870878260721e-05,
3914
+ "loss": 9.7659,
3915
+ "step": 558
3916
+ },
3917
+ {
3918
+ "epoch": 0.7713004484304933,
3919
+ "grad_norm": 1.2510404586791992,
3920
+ "learning_rate": 1.6282402143495568e-05,
3921
+ "loss": 9.7529,
3922
+ "step": 559
3923
+ },
3924
+ {
3925
+ "epoch": 0.7726802345636427,
3926
+ "grad_norm": 1.3847991228103638,
3927
+ "learning_rate": 1.6096950150405454e-05,
3928
+ "loss": 9.7254,
3929
+ "step": 560
3930
+ },
3931
+ {
3932
+ "epoch": 0.774060020696792,
3933
+ "grad_norm": 1.3537993431091309,
3934
+ "learning_rate": 1.5912357504021937e-05,
3935
+ "loss": 9.7168,
3936
+ "step": 561
3937
+ },
3938
+ {
3939
+ "epoch": 0.7754398068299414,
3940
+ "grad_norm": 1.5678364038467407,
3941
+ "learning_rate": 1.5728628883248007e-05,
3942
+ "loss": 9.6773,
3943
+ "step": 562
3944
+ },
3945
+ {
3946
+ "epoch": 0.7768195929630907,
3947
+ "grad_norm": 1.173730731010437,
3948
+ "learning_rate": 1.554576894508613e-05,
3949
+ "loss": 9.7439,
3950
+ "step": 563
3951
+ },
3952
+ {
3953
+ "epoch": 0.77819937909624,
3954
+ "grad_norm": 1.0499186515808105,
3955
+ "learning_rate": 1.536378232452003e-05,
3956
+ "loss": 9.7698,
3957
+ "step": 564
3958
+ },
3959
+ {
3960
+ "epoch": 0.7795791652293894,
3961
+ "grad_norm": 1.048542857170105,
3962
+ "learning_rate": 1.5182673634397365e-05,
3963
+ "loss": 9.7965,
3964
+ "step": 565
3965
+ },
3966
+ {
3967
+ "epoch": 0.7809589513625388,
3968
+ "grad_norm": 1.149909496307373,
3969
+ "learning_rate": 1.5002447465312675e-05,
3970
+ "loss": 9.7774,
3971
+ "step": 566
3972
+ },
3973
+ {
3974
+ "epoch": 0.7823387374956882,
3975
+ "grad_norm": 1.1968557834625244,
3976
+ "learning_rate": 1.4823108385491124e-05,
3977
+ "loss": 9.7471,
3978
+ "step": 567
3979
+ },
3980
+ {
3981
+ "epoch": 0.7837185236288375,
3982
+ "grad_norm": 1.135711908340454,
3983
+ "learning_rate": 1.4644660940672627e-05,
3984
+ "loss": 9.7686,
3985
+ "step": 568
3986
+ },
3987
+ {
3988
+ "epoch": 0.7850983097619869,
3989
+ "grad_norm": 1.3147321939468384,
3990
+ "learning_rate": 1.4467109653996697e-05,
3991
+ "loss": 9.7424,
3992
+ "step": 569
3993
+ },
3994
+ {
3995
+ "epoch": 0.7864780958951363,
3996
+ "grad_norm": 1.1964170932769775,
3997
+ "learning_rate": 1.429045902588777e-05,
3998
+ "loss": 9.7611,
3999
+ "step": 570
4000
+ },
4001
+ {
4002
+ "epoch": 0.7878578820282857,
4003
+ "grad_norm": 1.376891016960144,
4004
+ "learning_rate": 1.4114713533941082e-05,
4005
+ "loss": 9.7149,
4006
+ "step": 571
4007
+ },
4008
+ {
4009
+ "epoch": 0.7892376681614349,
4010
+ "grad_norm": 1.3214612007141113,
4011
+ "learning_rate": 1.3939877632809278e-05,
4012
+ "loss": 9.7467,
4013
+ "step": 572
4014
+ },
4015
+ {
4016
+ "epoch": 0.7906174542945843,
4017
+ "grad_norm": 1.346970796585083,
4018
+ "learning_rate": 1.3765955754089383e-05,
4019
+ "loss": 9.7122,
4020
+ "step": 573
4021
+ },
4022
+ {
4023
+ "epoch": 0.7919972404277337,
4024
+ "grad_norm": 1.4399006366729736,
4025
+ "learning_rate": 1.3592952306210588e-05,
4026
+ "loss": 9.7232,
4027
+ "step": 574
4028
+ },
4029
+ {
4030
+ "epoch": 0.793377026560883,
4031
+ "grad_norm": 1.645202875137329,
4032
+ "learning_rate": 1.3420871674322394e-05,
4033
+ "loss": 9.6723,
4034
+ "step": 575
4035
+ },
4036
+ {
4037
+ "epoch": 0.7947568126940324,
4038
+ "grad_norm": 0.9395657181739807,
4039
+ "learning_rate": 1.3249718220183583e-05,
4040
+ "loss": 9.8069,
4041
+ "step": 576
4042
+ },
4043
+ {
4044
+ "epoch": 0.7961365988271818,
4045
+ "grad_norm": 1.003074049949646,
4046
+ "learning_rate": 1.3079496282051529e-05,
4047
+ "loss": 9.7872,
4048
+ "step": 577
4049
+ },
4050
+ {
4051
+ "epoch": 0.7975163849603312,
4052
+ "grad_norm": 1.1187260150909424,
4053
+ "learning_rate": 1.2910210174572346e-05,
4054
+ "loss": 9.7854,
4055
+ "step": 578
4056
+ },
4057
+ {
4058
+ "epoch": 0.7988961710934805,
4059
+ "grad_norm": 1.1820377111434937,
4060
+ "learning_rate": 1.2741864188671492e-05,
4061
+ "loss": 9.7445,
4062
+ "step": 579
4063
+ },
4064
+ {
4065
+ "epoch": 0.8002759572266299,
4066
+ "grad_norm": 1.135115146636963,
4067
+ "learning_rate": 1.257446259144494e-05,
4068
+ "loss": 9.743,
4069
+ "step": 580
4070
+ },
4071
+ {
4072
+ "epoch": 0.8016557433597792,
4073
+ "grad_norm": 1.2748380899429321,
4074
+ "learning_rate": 1.2408009626051137e-05,
4075
+ "loss": 9.7491,
4076
+ "step": 581
4077
+ },
4078
+ {
4079
+ "epoch": 0.8030355294929286,
4080
+ "grad_norm": 1.2434227466583252,
4081
+ "learning_rate": 1.2242509511603317e-05,
4082
+ "loss": 9.7696,
4083
+ "step": 582
4084
+ },
4085
+ {
4086
+ "epoch": 0.8044153156260779,
4087
+ "grad_norm": 1.251225233078003,
4088
+ "learning_rate": 1.2077966443062705e-05,
4089
+ "loss": 9.7418,
4090
+ "step": 583
4091
+ },
4092
+ {
4093
+ "epoch": 0.8057951017592273,
4094
+ "grad_norm": 1.2516130208969116,
4095
+ "learning_rate": 1.1914384591132044e-05,
4096
+ "loss": 9.7266,
4097
+ "step": 584
4098
+ },
4099
+ {
4100
+ "epoch": 0.8071748878923767,
4101
+ "grad_norm": 1.392945647239685,
4102
+ "learning_rate": 1.1751768102150007e-05,
4103
+ "loss": 9.7269,
4104
+ "step": 585
4105
+ },
4106
+ {
4107
+ "epoch": 0.808554674025526,
4108
+ "grad_norm": 1.510158896446228,
4109
+ "learning_rate": 1.159012109798598e-05,
4110
+ "loss": 9.7113,
4111
+ "step": 586
4112
+ },
4113
+ {
4114
+ "epoch": 0.8099344601586754,
4115
+ "grad_norm": 1.5267751216888428,
4116
+ "learning_rate": 1.1429447675935695e-05,
4117
+ "loss": 9.704,
4118
+ "step": 587
4119
+ },
4120
+ {
4121
+ "epoch": 0.8113142462918248,
4122
+ "grad_norm": 1.1726975440979004,
4123
+ "learning_rate": 1.1269751908617277e-05,
4124
+ "loss": 9.7488,
4125
+ "step": 588
4126
+ },
4127
+ {
4128
+ "epoch": 0.8126940324249742,
4129
+ "grad_norm": 0.9718952775001526,
4130
+ "learning_rate": 1.1111037843868094e-05,
4131
+ "loss": 9.7921,
4132
+ "step": 589
4133
+ },
4134
+ {
4135
+ "epoch": 0.8140738185581234,
4136
+ "grad_norm": 1.0413875579833984,
4137
+ "learning_rate": 1.0953309504642128e-05,
4138
+ "loss": 9.7741,
4139
+ "step": 590
4140
+ },
4141
+ {
4142
+ "epoch": 0.8154536046912728,
4143
+ "grad_norm": 1.1728605031967163,
4144
+ "learning_rate": 1.0796570888907953e-05,
4145
+ "loss": 9.7641,
4146
+ "step": 591
4147
+ },
4148
+ {
4149
+ "epoch": 0.8168333908244222,
4150
+ "grad_norm": 1.157538652420044,
4151
+ "learning_rate": 1.0640825969547496e-05,
4152
+ "loss": 9.7696,
4153
+ "step": 592
4154
+ },
4155
+ {
4156
+ "epoch": 0.8182131769575716,
4157
+ "grad_norm": 1.2358477115631104,
4158
+ "learning_rate": 1.0486078694255242e-05,
4159
+ "loss": 9.7469,
4160
+ "step": 593
4161
+ },
4162
+ {
4163
+ "epoch": 0.8195929630907209,
4164
+ "grad_norm": 1.2600133419036865,
4165
+ "learning_rate": 1.0332332985438248e-05,
4166
+ "loss": 9.747,
4167
+ "step": 594
4168
+ },
4169
+ {
4170
+ "epoch": 0.8209727492238703,
4171
+ "grad_norm": 1.2197083234786987,
4172
+ "learning_rate": 1.017959274011665e-05,
4173
+ "loss": 9.7532,
4174
+ "step": 595
4175
+ },
4176
+ {
4177
+ "epoch": 0.8223525353570197,
4178
+ "grad_norm": 1.3774051666259766,
4179
+ "learning_rate": 1.0027861829824952e-05,
4180
+ "loss": 9.7325,
4181
+ "step": 596
4182
+ },
4183
+ {
4184
+ "epoch": 0.8237323214901691,
4185
+ "grad_norm": 1.38478422164917,
4186
+ "learning_rate": 9.87714410051383e-06,
4187
+ "loss": 9.7306,
4188
+ "step": 597
4189
+ },
4190
+ {
4191
+ "epoch": 0.8251121076233184,
4192
+ "grad_norm": 1.432771921157837,
4193
+ "learning_rate": 9.7274433724527e-06,
4194
+ "loss": 9.7276,
4195
+ "step": 598
4196
+ },
4197
+ {
4198
+ "epoch": 0.8264918937564677,
4199
+ "grad_norm": 1.5427794456481934,
4200
+ "learning_rate": 9.578763440132883e-06,
4201
+ "loss": 9.7113,
4202
+ "step": 599
4203
+ },
4204
+ {
4205
+ "epoch": 0.8278716798896171,
4206
+ "grad_norm": 1.5743299722671509,
4207
+ "learning_rate": 9.431108072171346e-06,
4208
+ "loss": 9.6803,
4209
+ "step": 600
4210
+ },
4211
+ {
4212
+ "epoch": 0.8292514660227664,
4213
+ "grad_norm": 0.921704113483429,
4214
+ "learning_rate": 9.284481011215318e-06,
4215
+ "loss": 9.7834,
4216
+ "step": 601
4217
+ },
4218
+ {
4219
+ "epoch": 0.8306312521559158,
4220
+ "grad_norm": 1.0345510244369507,
4221
+ "learning_rate": 9.138885973847261e-06,
4222
+ "loss": 9.79,
4223
+ "step": 602
4224
+ },
4225
+ {
4226
+ "epoch": 0.8320110382890652,
4227
+ "grad_norm": 1.182210087776184,
4228
+ "learning_rate": 8.994326650490831e-06,
4229
+ "loss": 9.7632,
4230
+ "step": 603
4231
+ },
4232
+ {
4233
+ "epoch": 0.8333908244222146,
4234
+ "grad_norm": 1.1576429605484009,
4235
+ "learning_rate": 8.850806705317183e-06,
4236
+ "loss": 9.7689,
4237
+ "step": 604
4238
+ },
4239
+ {
4240
+ "epoch": 0.834770610555364,
4241
+ "grad_norm": 1.2053581476211548,
4242
+ "learning_rate": 8.708329776152224e-06,
4243
+ "loss": 9.7558,
4244
+ "step": 605
4245
+ },
4246
+ {
4247
+ "epoch": 0.8361503966885133,
4248
+ "grad_norm": 1.2201980352401733,
4249
+ "learning_rate": 8.566899474384299e-06,
4250
+ "loss": 9.7571,
4251
+ "step": 606
4252
+ },
4253
+ {
4254
+ "epoch": 0.8375301828216627,
4255
+ "grad_norm": 1.1964430809020996,
4256
+ "learning_rate": 8.426519384872733e-06,
4257
+ "loss": 9.7458,
4258
+ "step": 607
4259
+ },
4260
+ {
4261
+ "epoch": 0.838909968954812,
4262
+ "grad_norm": 1.321816086769104,
4263
+ "learning_rate": 8.287193065856935e-06,
4264
+ "loss": 9.7307,
4265
+ "step": 608
4266
+ },
4267
+ {
4268
+ "epoch": 0.8402897550879613,
4269
+ "grad_norm": 1.290749430656433,
4270
+ "learning_rate": 8.148924048866169e-06,
4271
+ "loss": 9.7321,
4272
+ "step": 609
4273
+ },
4274
+ {
4275
+ "epoch": 0.8416695412211107,
4276
+ "grad_norm": 1.353271722793579,
4277
+ "learning_rate": 8.011715838630107e-06,
4278
+ "loss": 9.7231,
4279
+ "step": 610
4280
+ },
4281
+ {
4282
+ "epoch": 0.8430493273542601,
4283
+ "grad_norm": 1.4000368118286133,
4284
+ "learning_rate": 7.875571912989938e-06,
4285
+ "loss": 9.7083,
4286
+ "step": 611
4287
+ },
4288
+ {
4289
+ "epoch": 0.8444291134874095,
4290
+ "grad_norm": 1.4400883913040161,
4291
+ "learning_rate": 7.740495722810271e-06,
4292
+ "loss": 9.7085,
4293
+ "step": 612
4294
+ },
4295
+ {
4296
+ "epoch": 0.8458088996205588,
4297
+ "grad_norm": 1.134522795677185,
4298
+ "learning_rate": 7.606490691891577e-06,
4299
+ "loss": 9.7551,
4300
+ "step": 613
4301
+ },
4302
+ {
4303
+ "epoch": 0.8471886857537082,
4304
+ "grad_norm": 1.0418236255645752,
4305
+ "learning_rate": 7.4735602168835236e-06,
4306
+ "loss": 9.7693,
4307
+ "step": 614
4308
+ },
4309
+ {
4310
+ "epoch": 0.8485684718868576,
4311
+ "grad_norm": 1.080651879310608,
4312
+ "learning_rate": 7.341707667198766e-06,
4313
+ "loss": 9.7787,
4314
+ "step": 615
4315
+ },
4316
+ {
4317
+ "epoch": 0.849948258020007,
4318
+ "grad_norm": 1.1420639753341675,
4319
+ "learning_rate": 7.21093638492763e-06,
4320
+ "loss": 9.7674,
4321
+ "step": 616
4322
+ },
4323
+ {
4324
+ "epoch": 0.8513280441531562,
4325
+ "grad_norm": 1.221085786819458,
4326
+ "learning_rate": 7.081249684753327e-06,
4327
+ "loss": 9.7556,
4328
+ "step": 617
4329
+ },
4330
+ {
4331
+ "epoch": 0.8527078302863056,
4332
+ "grad_norm": 1.2512654066085815,
4333
+ "learning_rate": 6.952650853867993e-06,
4334
+ "loss": 9.7305,
4335
+ "step": 618
4336
+ },
4337
+ {
4338
+ "epoch": 0.854087616419455,
4339
+ "grad_norm": 1.2443575859069824,
4340
+ "learning_rate": 6.825143151889357e-06,
4341
+ "loss": 9.7577,
4342
+ "step": 619
4343
+ },
4344
+ {
4345
+ "epoch": 0.8554674025526043,
4346
+ "grad_norm": 1.2522335052490234,
4347
+ "learning_rate": 6.698729810778065e-06,
4348
+ "loss": 9.7628,
4349
+ "step": 620
4350
+ },
4351
+ {
4352
+ "epoch": 0.8568471886857537,
4353
+ "grad_norm": 1.360615611076355,
4354
+ "learning_rate": 6.5734140347558536e-06,
4355
+ "loss": 9.7367,
4356
+ "step": 621
4357
+ },
4358
+ {
4359
+ "epoch": 0.8582269748189031,
4360
+ "grad_norm": 1.3139582872390747,
4361
+ "learning_rate": 6.449199000224221e-06,
4362
+ "loss": 9.732,
4363
+ "step": 622
4364
+ },
4365
+ {
4366
+ "epoch": 0.8596067609520525,
4367
+ "grad_norm": 1.3527382612228394,
4368
+ "learning_rate": 6.3260878556840375e-06,
4369
+ "loss": 9.7361,
4370
+ "step": 623
4371
+ },
4372
+ {
4373
+ "epoch": 0.8609865470852018,
4374
+ "grad_norm": 1.4311867952346802,
4375
+ "learning_rate": 6.204083721655607e-06,
4376
+ "loss": 9.706,
4377
+ "step": 624
4378
+ },
4379
+ {
4380
+ "epoch": 0.8623663332183512,
4381
+ "grad_norm": 1.6295082569122314,
4382
+ "learning_rate": 6.083189690599711e-06,
4383
+ "loss": 9.6803,
4384
+ "step": 625
4385
+ },
4386
+ {
4387
+ "epoch": 0.8637461193515005,
4388
+ "grad_norm": 0.9568197131156921,
4389
+ "learning_rate": 5.9634088268390784e-06,
4390
+ "loss": 9.7839,
4391
+ "step": 626
4392
+ },
4393
+ {
4394
+ "epoch": 0.8651259054846498,
4395
+ "grad_norm": 1.080190896987915,
4396
+ "learning_rate": 5.8447441664808424e-06,
4397
+ "loss": 9.7847,
4398
+ "step": 627
4399
+ },
4400
+ {
4401
+ "epoch": 0.8665056916177992,
4402
+ "grad_norm": 1.0810461044311523,
4403
+ "learning_rate": 5.727198717339511e-06,
4404
+ "loss": 9.7586,
4405
+ "step": 628
4406
+ },
4407
+ {
4408
+ "epoch": 0.8678854777509486,
4409
+ "grad_norm": 1.1114495992660522,
4410
+ "learning_rate": 5.610775458860718e-06,
4411
+ "loss": 9.7832,
4412
+ "step": 629
4413
+ },
4414
+ {
4415
+ "epoch": 0.869265263884098,
4416
+ "grad_norm": 1.2362908124923706,
4417
+ "learning_rate": 5.495477342045779e-06,
4418
+ "loss": 9.7565,
4419
+ "step": 630
4420
+ },
4421
+ {
4422
+ "epoch": 0.8706450500172473,
4423
+ "grad_norm": 1.2435539960861206,
4424
+ "learning_rate": 5.381307289376786e-06,
4425
+ "loss": 9.7561,
4426
+ "step": 631
4427
+ },
4428
+ {
4429
+ "epoch": 0.8720248361503967,
4430
+ "grad_norm": 1.3071742057800293,
4431
+ "learning_rate": 5.2682681947426375e-06,
4432
+ "loss": 9.7482,
4433
+ "step": 632
4434
+ },
4435
+ {
4436
+ "epoch": 0.8734046222835461,
4437
+ "grad_norm": 1.2518724203109741,
4438
+ "learning_rate": 5.156362923365588e-06,
4439
+ "loss": 9.7569,
4440
+ "step": 633
4441
+ },
4442
+ {
4443
+ "epoch": 0.8747844084166954,
4444
+ "grad_norm": 1.3225029706954956,
4445
+ "learning_rate": 5.045594311728707e-06,
4446
+ "loss": 9.7413,
4447
+ "step": 634
4448
+ },
4449
+ {
4450
+ "epoch": 0.8761641945498447,
4451
+ "grad_norm": 1.3300327062606812,
4452
+ "learning_rate": 4.93596516750392e-06,
4453
+ "loss": 9.7283,
4454
+ "step": 635
4455
+ },
4456
+ {
4457
+ "epoch": 0.8775439806829941,
4458
+ "grad_norm": 1.3841952085494995,
4459
+ "learning_rate": 4.827478269480895e-06,
4460
+ "loss": 9.7273,
4461
+ "step": 636
4462
+ },
4463
+ {
4464
+ "epoch": 0.8789237668161435,
4465
+ "grad_norm": 1.5817680358886719,
4466
+ "learning_rate": 4.720136367496536e-06,
4467
+ "loss": 9.671,
4468
+ "step": 637
4469
+ },
4470
+ {
4471
+ "epoch": 0.8803035529492929,
4472
+ "grad_norm": 1.1424212455749512,
4473
+ "learning_rate": 4.613942182365372e-06,
4474
+ "loss": 9.7741,
4475
+ "step": 638
4476
+ },
4477
+ {
4478
+ "epoch": 0.8816833390824422,
4479
+ "grad_norm": 1.0098145008087158,
4480
+ "learning_rate": 4.508898405810524e-06,
4481
+ "loss": 9.7891,
4482
+ "step": 639
4483
+ },
4484
+ {
4485
+ "epoch": 0.8830631252155916,
4486
+ "grad_norm": 1.0495284795761108,
4487
+ "learning_rate": 4.405007700395497e-06,
4488
+ "loss": 9.7944,
4489
+ "step": 640
4490
+ },
4491
+ {
4492
+ "epoch": 0.884442911348741,
4493
+ "grad_norm": 1.165906310081482,
4494
+ "learning_rate": 4.3022726994567105e-06,
4495
+ "loss": 9.773,
4496
+ "step": 641
4497
+ },
4498
+ {
4499
+ "epoch": 0.8858226974818904,
4500
+ "grad_norm": 1.1741012334823608,
4501
+ "learning_rate": 4.200696007036703e-06,
4502
+ "loss": 9.767,
4503
+ "step": 642
4504
+ },
4505
+ {
4506
+ "epoch": 0.8872024836150396,
4507
+ "grad_norm": 1.2205920219421387,
4508
+ "learning_rate": 4.100280197818207e-06,
4509
+ "loss": 9.7574,
4510
+ "step": 643
4511
+ },
4512
+ {
4513
+ "epoch": 0.888582269748189,
4514
+ "grad_norm": 1.2672699689865112,
4515
+ "learning_rate": 4.001027817058789e-06,
4516
+ "loss": 9.7688,
4517
+ "step": 644
4518
+ },
4519
+ {
4520
+ "epoch": 0.8899620558813384,
4521
+ "grad_norm": 1.2361886501312256,
4522
+ "learning_rate": 3.902941380526426e-06,
4523
+ "loss": 9.7636,
4524
+ "step": 645
4525
+ },
4526
+ {
4527
+ "epoch": 0.8913418420144877,
4528
+ "grad_norm": 1.322023868560791,
4529
+ "learning_rate": 3.8060233744356633e-06,
4530
+ "loss": 9.7413,
4531
+ "step": 646
4532
+ },
4533
+ {
4534
+ "epoch": 0.8927216281476371,
4535
+ "grad_norm": 1.3382331132888794,
4536
+ "learning_rate": 3.7102762553846536e-06,
4537
+ "loss": 9.7279,
4538
+ "step": 647
4539
+ },
4540
+ {
4541
+ "epoch": 0.8941014142807865,
4542
+ "grad_norm": 1.345215082168579,
4543
+ "learning_rate": 3.615702450292857e-06,
4544
+ "loss": 9.745,
4545
+ "step": 648
4546
+ },
4547
+ {
4548
+ "epoch": 0.8954812004139359,
4549
+ "grad_norm": 1.3454034328460693,
4550
+ "learning_rate": 3.5223043563395288e-06,
4551
+ "loss": 9.721,
4552
+ "step": 649
4553
+ },
4554
+ {
4555
+ "epoch": 0.8968609865470852,
4556
+ "grad_norm": 1.4947688579559326,
4557
+ "learning_rate": 3.4300843409029726e-06,
4558
+ "loss": 9.7119,
4559
+ "step": 650
4560
+ },
4561
+ {
4562
+ "epoch": 0.8982407726802346,
4563
+ "grad_norm": 0.9093929529190063,
4564
+ "learning_rate": 3.3390447415005077e-06,
4565
+ "loss": 9.8062,
4566
+ "step": 651
4567
+ },
4568
+ {
4569
+ "epoch": 0.8996205588133839,
4570
+ "grad_norm": 1.040328860282898,
4571
+ "learning_rate": 3.249187865729264e-06,
4572
+ "loss": 9.7664,
4573
+ "step": 652
4574
+ },
4575
+ {
4576
+ "epoch": 0.9010003449465332,
4577
+ "grad_norm": 1.1193190813064575,
4578
+ "learning_rate": 3.160515991207619e-06,
4579
+ "loss": 9.7838,
4580
+ "step": 653
4581
+ },
4582
+ {
4583
+ "epoch": 0.9023801310796826,
4584
+ "grad_norm": 1.14997136592865,
4585
+ "learning_rate": 3.0730313655175645e-06,
4586
+ "loss": 9.768,
4587
+ "step": 654
4588
+ },
4589
+ {
4590
+ "epoch": 0.903759917212832,
4591
+ "grad_norm": 1.1968692541122437,
4592
+ "learning_rate": 2.9867362061476276e-06,
4593
+ "loss": 9.7622,
4594
+ "step": 655
4595
+ },
4596
+ {
4597
+ "epoch": 0.9051397033459814,
4598
+ "grad_norm": 1.1504422426223755,
4599
+ "learning_rate": 2.901632700436757e-06,
4600
+ "loss": 9.7521,
4601
+ "step": 656
4602
+ },
4603
+ {
4604
+ "epoch": 0.9065194894791307,
4605
+ "grad_norm": 1.2824842929840088,
4606
+ "learning_rate": 2.8177230055188353e-06,
4607
+ "loss": 9.7633,
4608
+ "step": 657
4609
+ },
4610
+ {
4611
+ "epoch": 0.9078992756122801,
4612
+ "grad_norm": 1.2600338459014893,
4613
+ "learning_rate": 2.7350092482679836e-06,
4614
+ "loss": 9.7494,
4615
+ "step": 658
4616
+ },
4617
+ {
4618
+ "epoch": 0.9092790617454295,
4619
+ "grad_norm": 1.2912122011184692,
4620
+ "learning_rate": 2.653493525244721e-06,
4621
+ "loss": 9.7263,
4622
+ "step": 659
4623
+ },
4624
+ {
4625
+ "epoch": 0.9106588478785789,
4626
+ "grad_norm": 1.415069580078125,
4627
+ "learning_rate": 2.573177902642726e-06,
4628
+ "loss": 9.7187,
4629
+ "step": 660
4630
+ },
4631
+ {
4632
+ "epoch": 0.9120386340117281,
4633
+ "grad_norm": 1.3458380699157715,
4634
+ "learning_rate": 2.4940644162365522e-06,
4635
+ "loss": 9.7266,
4636
+ "step": 661
4637
+ },
4638
+ {
4639
+ "epoch": 0.9134184201448775,
4640
+ "grad_norm": 1.5353480577468872,
4641
+ "learning_rate": 2.416155071329973e-06,
4642
+ "loss": 9.6964,
4643
+ "step": 662
4644
+ },
4645
+ {
4646
+ "epoch": 0.9147982062780269,
4647
+ "grad_norm": 1.0634307861328125,
4648
+ "learning_rate": 2.339451842705187e-06,
4649
+ "loss": 9.7851,
4650
+ "step": 663
4651
+ },
4652
+ {
4653
+ "epoch": 0.9161779924111763,
4654
+ "grad_norm": 1.0400745868682861,
4655
+ "learning_rate": 2.2639566745727205e-06,
4656
+ "loss": 9.7713,
4657
+ "step": 664
4658
+ },
4659
+ {
4660
+ "epoch": 0.9175577785443256,
4661
+ "grad_norm": 1.134487509727478,
4662
+ "learning_rate": 2.189671480522204e-06,
4663
+ "loss": 9.7789,
4664
+ "step": 665
4665
+ },
4666
+ {
4667
+ "epoch": 0.918937564677475,
4668
+ "grad_norm": 1.1036391258239746,
4669
+ "learning_rate": 2.1165981434738026e-06,
4670
+ "loss": 9.7722,
4671
+ "step": 666
4672
+ },
4673
+ {
4674
+ "epoch": 0.9203173508106244,
4675
+ "grad_norm": 1.1182793378829956,
4676
+ "learning_rate": 2.0447385156305565e-06,
4677
+ "loss": 9.7933,
4678
+ "step": 667
4679
+ },
4680
+ {
4681
+ "epoch": 0.9216971369437738,
4682
+ "grad_norm": 1.2758280038833618,
4683
+ "learning_rate": 1.974094418431388e-06,
4684
+ "loss": 9.7561,
4685
+ "step": 668
4686
+ },
4687
+ {
4688
+ "epoch": 0.9230769230769231,
4689
+ "grad_norm": 1.2668101787567139,
4690
+ "learning_rate": 1.9046676425049315e-06,
4691
+ "loss": 9.7434,
4692
+ "step": 669
4693
+ },
4694
+ {
4695
+ "epoch": 0.9244567092100724,
4696
+ "grad_norm": 1.2824689149856567,
4697
+ "learning_rate": 1.8364599476241862e-06,
4698
+ "loss": 9.7397,
4699
+ "step": 670
4700
+ },
4701
+ {
4702
+ "epoch": 0.9258364953432218,
4703
+ "grad_norm": 1.2909704446792603,
4704
+ "learning_rate": 1.7694730626618583e-06,
4705
+ "loss": 9.7306,
4706
+ "step": 671
4707
+ },
4708
+ {
4709
+ "epoch": 0.9272162814763711,
4710
+ "grad_norm": 1.3211469650268555,
4711
+ "learning_rate": 1.70370868554659e-06,
4712
+ "loss": 9.7482,
4713
+ "step": 672
4714
+ },
4715
+ {
4716
+ "epoch": 0.9285960676095205,
4717
+ "grad_norm": 1.3531687259674072,
4718
+ "learning_rate": 1.6391684832198718e-06,
4719
+ "loss": 9.7382,
4720
+ "step": 673
4721
+ },
4722
+ {
4723
+ "epoch": 0.9299758537426699,
4724
+ "grad_norm": 1.4724624156951904,
4725
+ "learning_rate": 1.5758540915938368e-06,
4726
+ "loss": 9.7051,
4727
+ "step": 674
4728
+ },
4729
+ {
4730
+ "epoch": 0.9313556398758193,
4731
+ "grad_norm": 1.6450470685958862,
4732
+ "learning_rate": 1.5137671155097565e-06,
4733
+ "loss": 9.6571,
4734
+ "step": 675
4735
+ },
4736
+ {
4737
+ "epoch": 0.9327354260089686,
4738
+ "grad_norm": 0.8748622536659241,
4739
+ "learning_rate": 1.4529091286973995e-06,
4740
+ "loss": 9.8211,
4741
+ "step": 676
4742
+ },
4743
+ {
4744
+ "epoch": 0.934115212142118,
4745
+ "grad_norm": 1.065389633178711,
4746
+ "learning_rate": 1.3932816737351085e-06,
4747
+ "loss": 9.7792,
4748
+ "step": 677
4749
+ },
4750
+ {
4751
+ "epoch": 0.9354949982752674,
4752
+ "grad_norm": 1.1271086931228638,
4753
+ "learning_rate": 1.3348862620107038e-06,
4754
+ "loss": 9.7635,
4755
+ "step": 678
4756
+ },
4757
+ {
4758
+ "epoch": 0.9368747844084166,
4759
+ "grad_norm": 1.126579999923706,
4760
+ "learning_rate": 1.27772437368322e-06,
4761
+ "loss": 9.7736,
4762
+ "step": 679
4763
+ },
4764
+ {
4765
+ "epoch": 0.938254570541566,
4766
+ "grad_norm": 1.1667218208312988,
4767
+ "learning_rate": 1.2217974576453073e-06,
4768
+ "loss": 9.7564,
4769
+ "step": 680
4770
+ },
4771
+ {
4772
+ "epoch": 0.9396343566747154,
4773
+ "grad_norm": 1.243585228919983,
4774
+ "learning_rate": 1.1671069314865834e-06,
4775
+ "loss": 9.7614,
4776
+ "step": 681
4777
+ },
4778
+ {
4779
+ "epoch": 0.9410141428078648,
4780
+ "grad_norm": 1.1980286836624146,
4781
+ "learning_rate": 1.1136541814576573e-06,
4782
+ "loss": 9.7782,
4783
+ "step": 682
4784
+ },
4785
+ {
4786
+ "epoch": 0.9423939289410141,
4787
+ "grad_norm": 1.2429455518722534,
4788
+ "learning_rate": 1.061440562435001e-06,
4789
+ "loss": 9.7618,
4790
+ "step": 683
4791
+ },
4792
+ {
4793
+ "epoch": 0.9437737150741635,
4794
+ "grad_norm": 1.306204080581665,
4795
+ "learning_rate": 1.0104673978866164e-06,
4796
+ "loss": 9.7464,
4797
+ "step": 684
4798
+ },
4799
+ {
4800
+ "epoch": 0.9451535012073129,
4801
+ "grad_norm": 1.3777480125427246,
4802
+ "learning_rate": 9.607359798384785e-07,
4803
+ "loss": 9.7229,
4804
+ "step": 685
4805
+ },
4806
+ {
4807
+ "epoch": 0.9465332873404623,
4808
+ "grad_norm": 1.3626405000686646,
4809
+ "learning_rate": 9.122475688417953e-07,
4810
+ "loss": 9.712,
4811
+ "step": 686
4812
+ },
4813
+ {
4814
+ "epoch": 0.9479130734736116,
4815
+ "grad_norm": 1.4798493385314941,
4816
+ "learning_rate": 8.650033939410385e-07,
4817
+ "loss": 9.7036,
4818
+ "step": 687
4819
+ },
4820
+ {
4821
+ "epoch": 0.9492928596067609,
4822
+ "grad_norm": 1.1905264854431152,
4823
+ "learning_rate": 8.190046526428242e-07,
4824
+ "loss": 9.7456,
4825
+ "step": 688
4826
+ },
4827
+ {
4828
+ "epoch": 0.9506726457399103,
4829
+ "grad_norm": 0.9911084175109863,
4830
+ "learning_rate": 7.742525108855148e-07,
4831
+ "loss": 9.7855,
4832
+ "step": 689
4833
+ },
4834
+ {
4835
+ "epoch": 0.9520524318730597,
4836
+ "grad_norm": 1.0882097482681274,
4837
+ "learning_rate": 7.307481030097152e-07,
4838
+ "loss": 9.7704,
4839
+ "step": 690
4840
+ },
4841
+ {
4842
+ "epoch": 0.953432218006209,
4843
+ "grad_norm": 1.1580933332443237,
4844
+ "learning_rate": 6.884925317294677e-07,
4845
+ "loss": 9.7733,
4846
+ "step": 691
4847
+ },
4848
+ {
4849
+ "epoch": 0.9548120041393584,
4850
+ "grad_norm": 1.1494512557983398,
4851
+ "learning_rate": 6.474868681043578e-07,
4852
+ "loss": 9.7834,
4853
+ "step": 692
4854
+ },
4855
+ {
4856
+ "epoch": 0.9561917902725078,
4857
+ "grad_norm": 1.1650831699371338,
4858
+ "learning_rate": 6.077321515123136e-07,
4859
+ "loss": 9.7708,
4860
+ "step": 693
4861
+ },
4862
+ {
4863
+ "epoch": 0.9575715764056572,
4864
+ "grad_norm": 1.2284369468688965,
4865
+ "learning_rate": 5.692293896232936e-07,
4866
+ "loss": 9.7564,
4867
+ "step": 694
4868
+ },
4869
+ {
4870
+ "epoch": 0.9589513625388065,
4871
+ "grad_norm": 1.2123266458511353,
4872
+ "learning_rate": 5.319795583737242e-07,
4873
+ "loss": 9.7493,
4874
+ "step": 695
4875
+ },
4876
+ {
4877
+ "epoch": 0.9603311486719558,
4878
+ "grad_norm": 1.2671525478363037,
4879
+ "learning_rate": 4.959836019417963e-07,
4880
+ "loss": 9.7491,
4881
+ "step": 696
4882
+ },
4883
+ {
4884
+ "epoch": 0.9617109348051052,
4885
+ "grad_norm": 1.2901546955108643,
4886
+ "learning_rate": 4.6124243272349653e-07,
4887
+ "loss": 9.745,
4888
+ "step": 697
4889
+ },
4890
+ {
4891
+ "epoch": 0.9630907209382545,
4892
+ "grad_norm": 1.3228528499603271,
4893
+ "learning_rate": 4.277569313094809e-07,
4894
+ "loss": 9.7245,
4895
+ "step": 698
4896
+ },
4897
+ {
4898
+ "epoch": 0.9644705070714039,
4899
+ "grad_norm": 1.4477964639663696,
4900
+ "learning_rate": 3.9552794646279255e-07,
4901
+ "loss": 9.7099,
4902
+ "step": 699
4903
+ },
4904
+ {
4905
+ "epoch": 0.9658502932045533,
4906
+ "grad_norm": 1.4955841302871704,
4907
+ "learning_rate": 3.6455629509730136e-07,
4908
+ "loss": 9.6912,
4909
+ "step": 700
4910
+ },
4911
+ {
4912
+ "epoch": 0.9672300793377027,
4913
+ "grad_norm": 0.8777285218238831,
4914
+ "learning_rate": 3.3484276225703714e-07,
4915
+ "loss": 9.8028,
4916
+ "step": 701
4917
+ },
4918
+ {
4919
+ "epoch": 0.968609865470852,
4920
+ "grad_norm": 1.04829740524292,
4921
+ "learning_rate": 3.0638810109626103e-07,
4922
+ "loss": 9.7746,
4923
+ "step": 702
4924
+ },
4925
+ {
4926
+ "epoch": 0.9699896516040014,
4927
+ "grad_norm": 1.18967604637146,
4928
+ "learning_rate": 2.79193032860392e-07,
4929
+ "loss": 9.7497,
4930
+ "step": 703
4931
+ },
4932
+ {
4933
+ "epoch": 0.9713694377371508,
4934
+ "grad_norm": 1.118967890739441,
4935
+ "learning_rate": 2.532582468677214e-07,
4936
+ "loss": 9.7728,
4937
+ "step": 704
4938
+ },
4939
+ {
4940
+ "epoch": 0.9727492238703,
4941
+ "grad_norm": 1.229331612586975,
4942
+ "learning_rate": 2.285844004919324e-07,
4943
+ "loss": 9.7464,
4944
+ "step": 705
4945
+ },
4946
+ {
4947
+ "epoch": 0.9741290100034494,
4948
+ "grad_norm": 1.1736466884613037,
4949
+ "learning_rate": 2.0517211914545254e-07,
4950
+ "loss": 9.7577,
4951
+ "step": 706
4952
+ },
4953
+ {
4954
+ "epoch": 0.9755087961365988,
4955
+ "grad_norm": 1.2047674655914307,
4956
+ "learning_rate": 1.8302199626358818e-07,
4957
+ "loss": 9.752,
4958
+ "step": 707
4959
+ },
4960
+ {
4961
+ "epoch": 0.9768885822697482,
4962
+ "grad_norm": 1.251903772354126,
4963
+ "learning_rate": 1.6213459328950352e-07,
4964
+ "loss": 9.7461,
4965
+ "step": 708
4966
+ },
4967
+ {
4968
+ "epoch": 0.9782683684028975,
4969
+ "grad_norm": 1.369354486465454,
4970
+ "learning_rate": 1.4251043965994304e-07,
4971
+ "loss": 9.7395,
4972
+ "step": 709
4973
+ },
4974
+ {
4975
+ "epoch": 0.9796481545360469,
4976
+ "grad_norm": 1.3757730722427368,
4977
+ "learning_rate": 1.2415003279186987e-07,
4978
+ "loss": 9.7186,
4979
+ "step": 710
4980
+ },
4981
+ {
4982
+ "epoch": 0.9810279406691963,
4983
+ "grad_norm": 1.3921722173690796,
4984
+ "learning_rate": 1.0705383806982606e-07,
4985
+ "loss": 9.7241,
4986
+ "step": 711
4987
+ },
4988
+ {
4989
+ "epoch": 0.9824077268023457,
4990
+ "grad_norm": 1.4798989295959473,
4991
+ "learning_rate": 9.12222888341252e-08,
4992
+ "loss": 9.7088,
4993
+ "step": 712
4994
+ },
4995
+ {
4996
+ "epoch": 0.983787512935495,
4997
+ "grad_norm": 1.149962067604065,
4998
+ "learning_rate": 7.665578636990579e-08,
4999
+ "loss": 9.7613,
5000
+ "step": 713
5001
+ },
5002
+ {
5003
+ "epoch": 0.9851672990686443,
5004
+ "grad_norm": 0.952008843421936,
5005
+ "learning_rate": 6.335469989692256e-08,
5006
+ "loss": 9.7976,
5007
+ "step": 714
5008
+ },
5009
+ {
5010
+ "epoch": 0.9865470852017937,
5011
+ "grad_norm": 1.064375400543213,
5012
+ "learning_rate": 5.131936656020409e-08,
5013
+ "loss": 9.7691,
5014
+ "step": 715
5015
+ },
5016
+ {
5017
+ "epoch": 0.9879268713349431,
5018
+ "grad_norm": 1.1418269872665405,
5019
+ "learning_rate": 4.055009142152067e-08,
5020
+ "loss": 9.7665,
5021
+ "step": 716
5022
+ },
5023
+ {
5024
+ "epoch": 0.9893066574680924,
5025
+ "grad_norm": 1.1035135984420776,
5026
+ "learning_rate": 3.10471474516183e-08,
5027
+ "loss": 9.782,
5028
+ "step": 717
5029
  }
5030
  ],
5031
  "logging_steps": 1,
 
5045
  "attributes": {}
5046
  }
5047
  },
5048
+ "total_flos": 595349320237056.0,
5049
  "train_batch_size": 4,
5050
  "trial_name": null,
5051
  "trial_params": null