schnell commited on
Commit
67ba99c
·
1 Parent(s): df76722

Training in progress, epoch 12

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bc327d4614644ce779e33f71927d5983243713a86a3a5a09bd8a82a9c614b5b
3
  size 236470789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1533d6bddf3c8d95f6ff4932752f745542fa6faede711ac4e5f78630fd44fdb
3
  size 236470789
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd672fa95725c90a4945d6b6f450c71cb23bdb758c11b8bb1978add5c38eb068
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8e789cbff592c7abcbb70a0a1080069b2a642bdd41392da72c8a88aa5f570ae
3
  size 118243218
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d113bc86624edcea00ec618f70ea2de03d430916dce27d2c17335ab2d3f3bc6
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1555aa793e45869a50c0ea8fa30e8bcbcb3c7466ee570768b03040a237ce44ce
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c58cb95ff8d57433dba1b5d6978dad2ce1b4eb172542e2538ff2da225a2256d
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2004c6163970935b37a492dabc7d20258e92b44d334d59d43006d4c3533be13
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3451bb6d6aefbf2710d312f0df2cf7c847dc0eab8b59b4f78791e8aadf73fa3
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c877a115c1cbc8e43174987b4b039efb0e758136a482d45ffc9deb2c3305746a
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.0,
5
- "global_step": 764203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9273,11 +9273,854 @@
9273
  "eval_samples_per_second": 414.54,
9274
  "eval_steps_per_second": 25.909,
9275
  "step": 764203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9276
  }
9277
  ],
9278
  "max_steps": 972622,
9279
  "num_train_epochs": 14,
9280
- "total_flos": 5.785429495460291e+18,
9281
  "trial_name": null,
9282
  "trial_params": null
9283
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
+ "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9273
  "eval_samples_per_second": 414.54,
9274
  "eval_steps_per_second": 25.909,
9275
  "step": 764203
9276
+ },
9277
+ {
9278
+ "epoch": 11.0,
9279
+ "learning_rate": 2.165064726683595e-05,
9280
+ "loss": 1.7248,
9281
+ "step": 764500
9282
+ },
9283
+ {
9284
+ "epoch": 11.01,
9285
+ "learning_rate": 2.1598824378566722e-05,
9286
+ "loss": 1.7201,
9287
+ "step": 765000
9288
+ },
9289
+ {
9290
+ "epoch": 11.02,
9291
+ "learning_rate": 2.1546897636813983e-05,
9292
+ "loss": 1.7237,
9293
+ "step": 765500
9294
+ },
9295
+ {
9296
+ "epoch": 11.03,
9297
+ "learning_rate": 2.1494970895061248e-05,
9298
+ "loss": 1.724,
9299
+ "step": 766000
9300
+ },
9301
+ {
9302
+ "epoch": 11.03,
9303
+ "learning_rate": 2.1443044153308513e-05,
9304
+ "loss": 1.7209,
9305
+ "step": 766500
9306
+ },
9307
+ {
9308
+ "epoch": 11.04,
9309
+ "learning_rate": 2.1391117411555778e-05,
9310
+ "loss": 1.7219,
9311
+ "step": 767000
9312
+ },
9313
+ {
9314
+ "epoch": 11.05,
9315
+ "learning_rate": 2.1339190669803043e-05,
9316
+ "loss": 1.7215,
9317
+ "step": 767500
9318
+ },
9319
+ {
9320
+ "epoch": 11.05,
9321
+ "learning_rate": 2.1287263928050308e-05,
9322
+ "loss": 1.7206,
9323
+ "step": 768000
9324
+ },
9325
+ {
9326
+ "epoch": 11.06,
9327
+ "learning_rate": 2.1235441039781077e-05,
9328
+ "loss": 1.7198,
9329
+ "step": 768500
9330
+ },
9331
+ {
9332
+ "epoch": 11.07,
9333
+ "learning_rate": 2.1183514298028342e-05,
9334
+ "loss": 1.7244,
9335
+ "step": 769000
9336
+ },
9337
+ {
9338
+ "epoch": 11.08,
9339
+ "learning_rate": 2.1131587556275607e-05,
9340
+ "loss": 1.7213,
9341
+ "step": 769500
9342
+ },
9343
+ {
9344
+ "epoch": 11.08,
9345
+ "learning_rate": 2.1079660814522872e-05,
9346
+ "loss": 1.7236,
9347
+ "step": 770000
9348
+ },
9349
+ {
9350
+ "epoch": 11.09,
9351
+ "learning_rate": 2.1027837926253645e-05,
9352
+ "loss": 1.721,
9353
+ "step": 770500
9354
+ },
9355
+ {
9356
+ "epoch": 11.1,
9357
+ "learning_rate": 2.0975911184500907e-05,
9358
+ "loss": 1.7223,
9359
+ "step": 771000
9360
+ },
9361
+ {
9362
+ "epoch": 11.11,
9363
+ "learning_rate": 2.092398444274817e-05,
9364
+ "loss": 1.7238,
9365
+ "step": 771500
9366
+ },
9367
+ {
9368
+ "epoch": 11.11,
9369
+ "learning_rate": 2.0872057700995436e-05,
9370
+ "loss": 1.7226,
9371
+ "step": 772000
9372
+ },
9373
+ {
9374
+ "epoch": 11.12,
9375
+ "learning_rate": 2.08201309592427e-05,
9376
+ "loss": 1.7211,
9377
+ "step": 772500
9378
+ },
9379
+ {
9380
+ "epoch": 11.13,
9381
+ "learning_rate": 2.0768204217489966e-05,
9382
+ "loss": 1.7223,
9383
+ "step": 773000
9384
+ },
9385
+ {
9386
+ "epoch": 11.13,
9387
+ "learning_rate": 2.0716381329220736e-05,
9388
+ "loss": 1.7184,
9389
+ "step": 773500
9390
+ },
9391
+ {
9392
+ "epoch": 11.14,
9393
+ "learning_rate": 2.0664454587468e-05,
9394
+ "loss": 1.723,
9395
+ "step": 774000
9396
+ },
9397
+ {
9398
+ "epoch": 11.15,
9399
+ "learning_rate": 2.0612527845715266e-05,
9400
+ "loss": 1.7148,
9401
+ "step": 774500
9402
+ },
9403
+ {
9404
+ "epoch": 11.16,
9405
+ "learning_rate": 2.056060110396253e-05,
9406
+ "loss": 1.7249,
9407
+ "step": 775000
9408
+ },
9409
+ {
9410
+ "epoch": 11.16,
9411
+ "learning_rate": 2.05087782156933e-05,
9412
+ "loss": 1.7156,
9413
+ "step": 775500
9414
+ },
9415
+ {
9416
+ "epoch": 11.17,
9417
+ "learning_rate": 2.0456851473940565e-05,
9418
+ "loss": 1.7209,
9419
+ "step": 776000
9420
+ },
9421
+ {
9422
+ "epoch": 11.18,
9423
+ "learning_rate": 2.040492473218783e-05,
9424
+ "loss": 1.7263,
9425
+ "step": 776500
9426
+ },
9427
+ {
9428
+ "epoch": 11.18,
9429
+ "learning_rate": 2.0352997990435095e-05,
9430
+ "loss": 1.7217,
9431
+ "step": 777000
9432
+ },
9433
+ {
9434
+ "epoch": 11.19,
9435
+ "learning_rate": 2.0301175102165865e-05,
9436
+ "loss": 1.7213,
9437
+ "step": 777500
9438
+ },
9439
+ {
9440
+ "epoch": 11.2,
9441
+ "learning_rate": 2.024924836041313e-05,
9442
+ "loss": 1.7238,
9443
+ "step": 778000
9444
+ },
9445
+ {
9446
+ "epoch": 11.21,
9447
+ "learning_rate": 2.0197321618660394e-05,
9448
+ "loss": 1.7207,
9449
+ "step": 778500
9450
+ },
9451
+ {
9452
+ "epoch": 11.21,
9453
+ "learning_rate": 2.014539487690766e-05,
9454
+ "loss": 1.7236,
9455
+ "step": 779000
9456
+ },
9457
+ {
9458
+ "epoch": 11.22,
9459
+ "learning_rate": 2.0093468135154924e-05,
9460
+ "loss": 1.7216,
9461
+ "step": 779500
9462
+ },
9463
+ {
9464
+ "epoch": 11.23,
9465
+ "learning_rate": 2.0041645246885697e-05,
9466
+ "loss": 1.7229,
9467
+ "step": 780000
9468
+ },
9469
+ {
9470
+ "epoch": 11.23,
9471
+ "learning_rate": 1.998971850513296e-05,
9472
+ "loss": 1.7196,
9473
+ "step": 780500
9474
+ },
9475
+ {
9476
+ "epoch": 11.24,
9477
+ "learning_rate": 1.9937791763380224e-05,
9478
+ "loss": 1.7199,
9479
+ "step": 781000
9480
+ },
9481
+ {
9482
+ "epoch": 11.25,
9483
+ "learning_rate": 1.988586502162749e-05,
9484
+ "loss": 1.7186,
9485
+ "step": 781500
9486
+ },
9487
+ {
9488
+ "epoch": 11.26,
9489
+ "learning_rate": 1.9833938279874753e-05,
9490
+ "loss": 1.7208,
9491
+ "step": 782000
9492
+ },
9493
+ {
9494
+ "epoch": 11.26,
9495
+ "learning_rate": 1.9782011538122018e-05,
9496
+ "loss": 1.7177,
9497
+ "step": 782500
9498
+ },
9499
+ {
9500
+ "epoch": 11.27,
9501
+ "learning_rate": 1.9730084796369283e-05,
9502
+ "loss": 1.7212,
9503
+ "step": 783000
9504
+ },
9505
+ {
9506
+ "epoch": 11.28,
9507
+ "learning_rate": 1.9678158054616548e-05,
9508
+ "loss": 1.7219,
9509
+ "step": 783500
9510
+ },
9511
+ {
9512
+ "epoch": 11.28,
9513
+ "learning_rate": 1.9626335166347318e-05,
9514
+ "loss": 1.7191,
9515
+ "step": 784000
9516
+ },
9517
+ {
9518
+ "epoch": 11.29,
9519
+ "learning_rate": 1.9574512278078087e-05,
9520
+ "loss": 1.7213,
9521
+ "step": 784500
9522
+ },
9523
+ {
9524
+ "epoch": 11.3,
9525
+ "learning_rate": 1.9522585536325352e-05,
9526
+ "loss": 1.7227,
9527
+ "step": 785000
9528
+ },
9529
+ {
9530
+ "epoch": 11.31,
9531
+ "learning_rate": 1.9470658794572617e-05,
9532
+ "loss": 1.7204,
9533
+ "step": 785500
9534
+ },
9535
+ {
9536
+ "epoch": 11.31,
9537
+ "learning_rate": 1.9418732052819882e-05,
9538
+ "loss": 1.7223,
9539
+ "step": 786000
9540
+ },
9541
+ {
9542
+ "epoch": 11.32,
9543
+ "learning_rate": 1.9366805311067147e-05,
9544
+ "loss": 1.7194,
9545
+ "step": 786500
9546
+ },
9547
+ {
9548
+ "epoch": 11.33,
9549
+ "learning_rate": 1.9314878569314412e-05,
9550
+ "loss": 1.7205,
9551
+ "step": 787000
9552
+ },
9553
+ {
9554
+ "epoch": 11.34,
9555
+ "learning_rate": 1.9262951827561677e-05,
9556
+ "loss": 1.7195,
9557
+ "step": 787500
9558
+ },
9559
+ {
9560
+ "epoch": 11.34,
9561
+ "learning_rate": 1.9211128939292446e-05,
9562
+ "loss": 1.7208,
9563
+ "step": 788000
9564
+ },
9565
+ {
9566
+ "epoch": 11.35,
9567
+ "learning_rate": 1.915920219753971e-05,
9568
+ "loss": 1.7161,
9569
+ "step": 788500
9570
+ },
9571
+ {
9572
+ "epoch": 11.36,
9573
+ "learning_rate": 1.9107275455786976e-05,
9574
+ "loss": 1.7181,
9575
+ "step": 789000
9576
+ },
9577
+ {
9578
+ "epoch": 11.36,
9579
+ "learning_rate": 1.905534871403424e-05,
9580
+ "loss": 1.7177,
9581
+ "step": 789500
9582
+ },
9583
+ {
9584
+ "epoch": 11.37,
9585
+ "learning_rate": 1.9003421972281506e-05,
9586
+ "loss": 1.7162,
9587
+ "step": 790000
9588
+ },
9589
+ {
9590
+ "epoch": 11.38,
9591
+ "learning_rate": 1.8951599084012276e-05,
9592
+ "loss": 1.717,
9593
+ "step": 790500
9594
+ },
9595
+ {
9596
+ "epoch": 11.39,
9597
+ "learning_rate": 1.889967234225954e-05,
9598
+ "loss": 1.7229,
9599
+ "step": 791000
9600
+ },
9601
+ {
9602
+ "epoch": 11.39,
9603
+ "learning_rate": 1.8847745600506805e-05,
9604
+ "loss": 1.7197,
9605
+ "step": 791500
9606
+ },
9607
+ {
9608
+ "epoch": 11.4,
9609
+ "learning_rate": 1.879581885875407e-05,
9610
+ "loss": 1.7194,
9611
+ "step": 792000
9612
+ },
9613
+ {
9614
+ "epoch": 11.41,
9615
+ "learning_rate": 1.8743892117001335e-05,
9616
+ "loss": 1.7226,
9617
+ "step": 792500
9618
+ },
9619
+ {
9620
+ "epoch": 11.41,
9621
+ "learning_rate": 1.8692069228732105e-05,
9622
+ "loss": 1.7229,
9623
+ "step": 793000
9624
+ },
9625
+ {
9626
+ "epoch": 11.42,
9627
+ "learning_rate": 1.864014248697937e-05,
9628
+ "loss": 1.7167,
9629
+ "step": 793500
9630
+ },
9631
+ {
9632
+ "epoch": 11.43,
9633
+ "learning_rate": 1.8588215745226638e-05,
9634
+ "loss": 1.7203,
9635
+ "step": 794000
9636
+ },
9637
+ {
9638
+ "epoch": 11.44,
9639
+ "learning_rate": 1.85362890034739e-05,
9640
+ "loss": 1.723,
9641
+ "step": 794500
9642
+ },
9643
+ {
9644
+ "epoch": 11.44,
9645
+ "learning_rate": 1.8484362261721164e-05,
9646
+ "loss": 1.7216,
9647
+ "step": 795000
9648
+ },
9649
+ {
9650
+ "epoch": 11.45,
9651
+ "learning_rate": 1.8432539373451934e-05,
9652
+ "loss": 1.7197,
9653
+ "step": 795500
9654
+ },
9655
+ {
9656
+ "epoch": 11.46,
9657
+ "learning_rate": 1.83806126316992e-05,
9658
+ "loss": 1.7171,
9659
+ "step": 796000
9660
+ },
9661
+ {
9662
+ "epoch": 11.46,
9663
+ "learning_rate": 1.8328685889946464e-05,
9664
+ "loss": 1.7186,
9665
+ "step": 796500
9666
+ },
9667
+ {
9668
+ "epoch": 11.47,
9669
+ "learning_rate": 1.827675914819373e-05,
9670
+ "loss": 1.7188,
9671
+ "step": 797000
9672
+ },
9673
+ {
9674
+ "epoch": 11.48,
9675
+ "learning_rate": 1.82249362599245e-05,
9676
+ "loss": 1.7182,
9677
+ "step": 797500
9678
+ },
9679
+ {
9680
+ "epoch": 11.49,
9681
+ "learning_rate": 1.8173009518171763e-05,
9682
+ "loss": 1.7204,
9683
+ "step": 798000
9684
+ },
9685
+ {
9686
+ "epoch": 11.49,
9687
+ "learning_rate": 1.8121082776419028e-05,
9688
+ "loss": 1.7228,
9689
+ "step": 798500
9690
+ },
9691
+ {
9692
+ "epoch": 11.5,
9693
+ "learning_rate": 1.8069156034666293e-05,
9694
+ "loss": 1.7191,
9695
+ "step": 799000
9696
+ },
9697
+ {
9698
+ "epoch": 11.51,
9699
+ "learning_rate": 1.8017229292913558e-05,
9700
+ "loss": 1.7188,
9701
+ "step": 799500
9702
+ },
9703
+ {
9704
+ "epoch": 11.52,
9705
+ "learning_rate": 1.7965406404644328e-05,
9706
+ "loss": 1.7207,
9707
+ "step": 800000
9708
+ },
9709
+ {
9710
+ "epoch": 11.52,
9711
+ "learning_rate": 1.7913479662891593e-05,
9712
+ "loss": 1.7174,
9713
+ "step": 800500
9714
+ },
9715
+ {
9716
+ "epoch": 11.53,
9717
+ "learning_rate": 1.7861552921138858e-05,
9718
+ "loss": 1.7187,
9719
+ "step": 801000
9720
+ },
9721
+ {
9722
+ "epoch": 11.54,
9723
+ "learning_rate": 1.7809626179386122e-05,
9724
+ "loss": 1.7177,
9725
+ "step": 801500
9726
+ },
9727
+ {
9728
+ "epoch": 11.54,
9729
+ "learning_rate": 1.7757699437633387e-05,
9730
+ "loss": 1.7169,
9731
+ "step": 802000
9732
+ },
9733
+ {
9734
+ "epoch": 11.55,
9735
+ "learning_rate": 1.770587654936416e-05,
9736
+ "loss": 1.7187,
9737
+ "step": 802500
9738
+ },
9739
+ {
9740
+ "epoch": 11.56,
9741
+ "learning_rate": 1.7653949807611422e-05,
9742
+ "loss": 1.7142,
9743
+ "step": 803000
9744
+ },
9745
+ {
9746
+ "epoch": 11.57,
9747
+ "learning_rate": 1.760202306585869e-05,
9748
+ "loss": 1.7166,
9749
+ "step": 803500
9750
+ },
9751
+ {
9752
+ "epoch": 11.57,
9753
+ "learning_rate": 1.755009632410595e-05,
9754
+ "loss": 1.7169,
9755
+ "step": 804000
9756
+ },
9757
+ {
9758
+ "epoch": 11.58,
9759
+ "learning_rate": 1.7498273435836725e-05,
9760
+ "loss": 1.7156,
9761
+ "step": 804500
9762
+ },
9763
+ {
9764
+ "epoch": 11.59,
9765
+ "learning_rate": 1.7446346694083986e-05,
9766
+ "loss": 1.7156,
9767
+ "step": 805000
9768
+ },
9769
+ {
9770
+ "epoch": 11.59,
9771
+ "learning_rate": 1.739441995233125e-05,
9772
+ "loss": 1.7159,
9773
+ "step": 805500
9774
+ },
9775
+ {
9776
+ "epoch": 11.6,
9777
+ "learning_rate": 1.7342493210578516e-05,
9778
+ "loss": 1.717,
9779
+ "step": 806000
9780
+ },
9781
+ {
9782
+ "epoch": 11.61,
9783
+ "learning_rate": 1.729056646882578e-05,
9784
+ "loss": 1.716,
9785
+ "step": 806500
9786
+ },
9787
+ {
9788
+ "epoch": 11.62,
9789
+ "learning_rate": 1.723874358055655e-05,
9790
+ "loss": 1.7217,
9791
+ "step": 807000
9792
+ },
9793
+ {
9794
+ "epoch": 11.62,
9795
+ "learning_rate": 1.7186816838803815e-05,
9796
+ "loss": 1.7168,
9797
+ "step": 807500
9798
+ },
9799
+ {
9800
+ "epoch": 11.63,
9801
+ "learning_rate": 1.713489009705108e-05,
9802
+ "loss": 1.7147,
9803
+ "step": 808000
9804
+ },
9805
+ {
9806
+ "epoch": 11.64,
9807
+ "learning_rate": 1.7082963355298345e-05,
9808
+ "loss": 1.7156,
9809
+ "step": 808500
9810
+ },
9811
+ {
9812
+ "epoch": 11.64,
9813
+ "learning_rate": 1.7031140467029115e-05,
9814
+ "loss": 1.7187,
9815
+ "step": 809000
9816
+ },
9817
+ {
9818
+ "epoch": 11.65,
9819
+ "learning_rate": 1.697921372527638e-05,
9820
+ "loss": 1.7168,
9821
+ "step": 809500
9822
+ },
9823
+ {
9824
+ "epoch": 11.66,
9825
+ "learning_rate": 1.6927286983523648e-05,
9826
+ "loss": 1.7184,
9827
+ "step": 810000
9828
+ },
9829
+ {
9830
+ "epoch": 11.67,
9831
+ "learning_rate": 1.687536024177091e-05,
9832
+ "loss": 1.7142,
9833
+ "step": 810500
9834
+ },
9835
+ {
9836
+ "epoch": 11.67,
9837
+ "learning_rate": 1.6823433500018178e-05,
9838
+ "loss": 1.7171,
9839
+ "step": 811000
9840
+ },
9841
+ {
9842
+ "epoch": 11.68,
9843
+ "learning_rate": 1.6771610611748944e-05,
9844
+ "loss": 1.7163,
9845
+ "step": 811500
9846
+ },
9847
+ {
9848
+ "epoch": 11.69,
9849
+ "learning_rate": 1.6719683869996212e-05,
9850
+ "loss": 1.7208,
9851
+ "step": 812000
9852
+ },
9853
+ {
9854
+ "epoch": 11.7,
9855
+ "learning_rate": 1.6667757128243474e-05,
9856
+ "loss": 1.714,
9857
+ "step": 812500
9858
+ },
9859
+ {
9860
+ "epoch": 11.7,
9861
+ "learning_rate": 1.661583038649074e-05,
9862
+ "loss": 1.7158,
9863
+ "step": 813000
9864
+ },
9865
+ {
9866
+ "epoch": 11.71,
9867
+ "learning_rate": 1.656400749822151e-05,
9868
+ "loss": 1.7167,
9869
+ "step": 813500
9870
+ },
9871
+ {
9872
+ "epoch": 11.72,
9873
+ "learning_rate": 1.6512080756468777e-05,
9874
+ "loss": 1.7153,
9875
+ "step": 814000
9876
+ },
9877
+ {
9878
+ "epoch": 11.72,
9879
+ "learning_rate": 1.6460154014716038e-05,
9880
+ "loss": 1.7164,
9881
+ "step": 814500
9882
+ },
9883
+ {
9884
+ "epoch": 11.73,
9885
+ "learning_rate": 1.6408227272963303e-05,
9886
+ "loss": 1.7186,
9887
+ "step": 815000
9888
+ },
9889
+ {
9890
+ "epoch": 11.74,
9891
+ "learning_rate": 1.6356404384694073e-05,
9892
+ "loss": 1.7179,
9893
+ "step": 815500
9894
+ },
9895
+ {
9896
+ "epoch": 11.75,
9897
+ "learning_rate": 1.630447764294134e-05,
9898
+ "loss": 1.7159,
9899
+ "step": 816000
9900
+ },
9901
+ {
9902
+ "epoch": 11.75,
9903
+ "learning_rate": 1.6252550901188603e-05,
9904
+ "loss": 1.7159,
9905
+ "step": 816500
9906
+ },
9907
+ {
9908
+ "epoch": 11.76,
9909
+ "learning_rate": 1.6200624159435868e-05,
9910
+ "loss": 1.7179,
9911
+ "step": 817000
9912
+ },
9913
+ {
9914
+ "epoch": 11.77,
9915
+ "learning_rate": 1.6148801271166637e-05,
9916
+ "loss": 1.7141,
9917
+ "step": 817500
9918
+ },
9919
+ {
9920
+ "epoch": 11.77,
9921
+ "learning_rate": 1.6096874529413902e-05,
9922
+ "loss": 1.7203,
9923
+ "step": 818000
9924
+ },
9925
+ {
9926
+ "epoch": 11.78,
9927
+ "learning_rate": 1.604494778766117e-05,
9928
+ "loss": 1.7158,
9929
+ "step": 818500
9930
+ },
9931
+ {
9932
+ "epoch": 11.79,
9933
+ "learning_rate": 1.5993021045908432e-05,
9934
+ "loss": 1.7126,
9935
+ "step": 819000
9936
+ },
9937
+ {
9938
+ "epoch": 11.8,
9939
+ "learning_rate": 1.59410943041557e-05,
9940
+ "loss": 1.7146,
9941
+ "step": 819500
9942
+ },
9943
+ {
9944
+ "epoch": 11.8,
9945
+ "learning_rate": 1.5889271415886466e-05,
9946
+ "loss": 1.713,
9947
+ "step": 820000
9948
+ },
9949
+ {
9950
+ "epoch": 11.81,
9951
+ "learning_rate": 1.5837344674133735e-05,
9952
+ "loss": 1.715,
9953
+ "step": 820500
9954
+ },
9955
+ {
9956
+ "epoch": 11.82,
9957
+ "learning_rate": 1.5785417932380996e-05,
9958
+ "loss": 1.7166,
9959
+ "step": 821000
9960
+ },
9961
+ {
9962
+ "epoch": 11.82,
9963
+ "learning_rate": 1.5733491190628264e-05,
9964
+ "loss": 1.7166,
9965
+ "step": 821500
9966
+ },
9967
+ {
9968
+ "epoch": 11.83,
9969
+ "learning_rate": 1.5681564448875526e-05,
9970
+ "loss": 1.7136,
9971
+ "step": 822000
9972
+ },
9973
+ {
9974
+ "epoch": 11.84,
9975
+ "learning_rate": 1.562963770712279e-05,
9976
+ "loss": 1.7152,
9977
+ "step": 822500
9978
+ },
9979
+ {
9980
+ "epoch": 11.85,
9981
+ "learning_rate": 1.557781481885356e-05,
9982
+ "loss": 1.7161,
9983
+ "step": 823000
9984
+ },
9985
+ {
9986
+ "epoch": 11.85,
9987
+ "learning_rate": 1.552588807710083e-05,
9988
+ "loss": 1.7157,
9989
+ "step": 823500
9990
+ },
9991
+ {
9992
+ "epoch": 11.86,
9993
+ "learning_rate": 1.547396133534809e-05,
9994
+ "loss": 1.7162,
9995
+ "step": 824000
9996
+ },
9997
+ {
9998
+ "epoch": 11.87,
9999
+ "learning_rate": 1.5422034593595355e-05,
10000
+ "loss": 1.717,
10001
+ "step": 824500
10002
+ },
10003
+ {
10004
+ "epoch": 11.88,
10005
+ "learning_rate": 1.5370211705326125e-05,
10006
+ "loss": 1.7182,
10007
+ "step": 825000
10008
+ },
10009
+ {
10010
+ "epoch": 11.88,
10011
+ "learning_rate": 1.5318284963573393e-05,
10012
+ "loss": 1.7174,
10013
+ "step": 825500
10014
+ },
10015
+ {
10016
+ "epoch": 11.89,
10017
+ "learning_rate": 1.5266358221820655e-05,
10018
+ "loss": 1.7126,
10019
+ "step": 826000
10020
+ },
10021
+ {
10022
+ "epoch": 11.9,
10023
+ "learning_rate": 1.521443148006792e-05,
10024
+ "loss": 1.7112,
10025
+ "step": 826500
10026
+ },
10027
+ {
10028
+ "epoch": 11.9,
10029
+ "learning_rate": 1.5162608591798691e-05,
10030
+ "loss": 1.7143,
10031
+ "step": 827000
10032
+ },
10033
+ {
10034
+ "epoch": 11.91,
10035
+ "learning_rate": 1.5110681850045954e-05,
10036
+ "loss": 1.7127,
10037
+ "step": 827500
10038
+ },
10039
+ {
10040
+ "epoch": 11.92,
10041
+ "learning_rate": 1.505875510829322e-05,
10042
+ "loss": 1.72,
10043
+ "step": 828000
10044
+ },
10045
+ {
10046
+ "epoch": 11.93,
10047
+ "learning_rate": 1.5006828366540484e-05,
10048
+ "loss": 1.7146,
10049
+ "step": 828500
10050
+ },
10051
+ {
10052
+ "epoch": 11.93,
10053
+ "learning_rate": 1.4955005478271255e-05,
10054
+ "loss": 1.7156,
10055
+ "step": 829000
10056
+ },
10057
+ {
10058
+ "epoch": 11.94,
10059
+ "learning_rate": 1.4903078736518518e-05,
10060
+ "loss": 1.7144,
10061
+ "step": 829500
10062
+ },
10063
+ {
10064
+ "epoch": 11.95,
10065
+ "learning_rate": 1.4851151994765785e-05,
10066
+ "loss": 1.7136,
10067
+ "step": 830000
10068
+ },
10069
+ {
10070
+ "epoch": 11.95,
10071
+ "learning_rate": 1.479922525301305e-05,
10072
+ "loss": 1.713,
10073
+ "step": 830500
10074
+ },
10075
+ {
10076
+ "epoch": 11.96,
10077
+ "learning_rate": 1.474740236474382e-05,
10078
+ "loss": 1.7165,
10079
+ "step": 831000
10080
+ },
10081
+ {
10082
+ "epoch": 11.97,
10083
+ "learning_rate": 1.4695475622991084e-05,
10084
+ "loss": 1.7137,
10085
+ "step": 831500
10086
+ },
10087
+ {
10088
+ "epoch": 11.98,
10089
+ "learning_rate": 1.4643548881238351e-05,
10090
+ "loss": 1.7115,
10091
+ "step": 832000
10092
+ },
10093
+ {
10094
+ "epoch": 11.98,
10095
+ "learning_rate": 1.4591622139485614e-05,
10096
+ "loss": 1.7183,
10097
+ "step": 832500
10098
+ },
10099
+ {
10100
+ "epoch": 11.99,
10101
+ "learning_rate": 1.4539695397732881e-05,
10102
+ "loss": 1.7142,
10103
+ "step": 833000
10104
+ },
10105
+ {
10106
+ "epoch": 12.0,
10107
+ "learning_rate": 1.4487872509463649e-05,
10108
+ "loss": 1.7127,
10109
+ "step": 833500
10110
+ },
10111
+ {
10112
+ "epoch": 12.0,
10113
+ "eval_accuracy": 0.6658040615937678,
10114
+ "eval_loss": 1.5836162567138672,
10115
+ "eval_runtime": 1303.2983,
10116
+ "eval_samples_per_second": 413.518,
10117
+ "eval_steps_per_second": 25.845,
10118
+ "step": 833676
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
+ "total_flos": 6.311350074383032e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd672fa95725c90a4945d6b6f450c71cb23bdb758c11b8bb1978add5c38eb068
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8e789cbff592c7abcbb70a0a1080069b2a642bdd41392da72c8a88aa5f570ae
3
  size 118243218
runs/Feb22_09-35-52_ubuntu-2004/events.out.tfevents.1677026198.ubuntu-2004.1870487.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:940ab48db67deba9e366fcbf4edbc147d99ee2253a760b05ef3d3e32759fcbed
3
- size 251759
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54427ce9472c28a9802ca844bd62a2af3e4bc1d184b1bcde76bbe915e3c41367
3
+ size 274328