RoyJoy commited on
Commit
89aedb0
·
verified ·
1 Parent(s): 77a3e8c

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3aba87a16fa998a552c8b768c644de9d4e0fe4a5f504abcf81b8ed7de15f130
3
  size 1521616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5939366f8e0c4bfe7979f5551b49b628228d8ab0e5bf7c4503e74b912e5c8bd7
3
  size 1521616
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08b48bca0faa493633e224a1c9caa9e5317594d6feed0fcd550e847fb8b6af24
3
  size 3108666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a92e30fa822d7cfa2f2461ed09366ad8c4f5720138cf1caee3a6c7173e724f
3
  size 3108666
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9298a73ccc76bbc3920b4f7e6e9a37f91a568faab038e3125c981e3c50631675
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c4c3ec5df6eba4c05a06b4188110a85b051b2b13e1681cb16e463ef78abe17
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48d491134409fdc628dfe027758d4ad26a55004194677ae851cf5852a1134fef
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc53838d71d7ffbc88192ec7303d7e6bbca69387969f4dd64884419c5fd8735
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37cc9c666ac1f34c1437039218f87639ea692b30fde543b8aabbdf7af6de7497
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38ca30957f1bae8c2f8be41072eb1e4564215c1b03164f3762535e35c2f63d5
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eaf07b326c1c98a0991053eed2afb3a6ffd2ce0bf53983a39d5c68c262119ed
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfa6812679ba3104c6d0c851d5e9ffb6d9a1a191ceb230d4b225523e0da3d8aa
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af8bec7b0e7e041be94e0aec289bc9cb45a2194eb6c5ff3bec5b9654ffb56253
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4104d3ca8fe9069127dc26e97c51fd3e36819b64c5c1d64d2ff7604a3c21107e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.095752477645874,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.9213936078318457,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,372 @@
381
  "eval_samples_per_second": 254.354,
382
  "eval_steps_per_second": 66.132,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -409,7 +775,7 @@
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 464208857661440.0,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.994510531425476,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.8465303771955082,
5
  "eval_steps": 25,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 254.354,
382
  "eval_steps_per_second": 66.132,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.9398214799884825,
387
+ "grad_norm": 3.3888142108917236,
388
+ "learning_rate": 8.295635440954695e-05,
389
+ "loss": 2.2467,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.9582493521451195,
394
+ "grad_norm": 1.6675165891647339,
395
+ "learning_rate": 8.224519749672376e-05,
396
+ "loss": 2.1031,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.9766772243017564,
401
+ "grad_norm": 1.0358479022979736,
402
+ "learning_rate": 8.152313181077242e-05,
403
+ "loss": 2.0694,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.9951050964583933,
408
+ "grad_norm": 1.0793170928955078,
409
+ "learning_rate": 8.079044646138837e-05,
410
+ "loss": 2.0398,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 1.0172761301468471,
415
+ "grad_norm": 5.010680675506592,
416
+ "learning_rate": 8.004743481030088e-05,
417
+ "loss": 4.036,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 1.035704002303484,
422
+ "grad_norm": 3.263810873031616,
423
+ "learning_rate": 7.929439435381305e-05,
424
+ "loss": 2.2462,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 1.054131874460121,
429
+ "grad_norm": 2.19437837600708,
430
+ "learning_rate": 7.853162660368662e-05,
431
+ "loss": 2.1076,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 1.0725597466167578,
436
+ "grad_norm": 1.1197891235351562,
437
+ "learning_rate": 7.775943696641888e-05,
438
+ "loss": 2.0394,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 1.0909876187733947,
443
+ "grad_norm": 1.35910964012146,
444
+ "learning_rate": 7.697813462096025e-05,
445
+ "loss": 2.0473,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 1.1094154909300316,
450
+ "grad_norm": 1.9377872943878174,
451
+ "learning_rate": 7.618803239492121e-05,
452
+ "loss": 2.0849,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 1.1278433630866687,
457
+ "grad_norm": 1.510143756866455,
458
+ "learning_rate": 7.538944663931862e-05,
459
+ "loss": 2.0566,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 1.1462712352433055,
464
+ "grad_norm": 2.0934293270111084,
465
+ "learning_rate": 7.458269710191101e-05,
466
+ "loss": 1.9738,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 1.1646991073999424,
471
+ "grad_norm": 0.8613946437835693,
472
+ "learning_rate": 7.376810679917411e-05,
473
+ "loss": 2.0698,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 1.1831269795565793,
478
+ "grad_norm": 0.933632493019104,
479
+ "learning_rate": 7.294600188696732e-05,
480
+ "loss": 2.0709,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 1.2015548517132162,
485
+ "grad_norm": 1.4427233934402466,
486
+ "learning_rate": 7.211671152994348e-05,
487
+ "loss": 1.9954,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 1.219982723869853,
492
+ "grad_norm": 2.0800139904022217,
493
+ "learning_rate": 7.128056776975369e-05,
494
+ "loss": 1.9794,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 1.23841059602649,
499
+ "grad_norm": 1.3812874555587769,
500
+ "learning_rate": 7.043790539210045e-05,
501
+ "loss": 2.0207,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 1.256838468183127,
506
+ "grad_norm": 1.9489840269088745,
507
+ "learning_rate": 6.95890617926918e-05,
508
+ "loss": 2.0679,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 1.275266340339764,
513
+ "grad_norm": 1.545703411102295,
514
+ "learning_rate": 6.873437684215077e-05,
515
+ "loss": 2.0753,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 1.2936942124964008,
520
+ "grad_norm": 1.9554816484451294,
521
+ "learning_rate": 6.787419274993366e-05,
522
+ "loss": 2.0324,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 1.3121220846530377,
527
+ "grad_norm": 2.3300676345825195,
528
+ "learning_rate": 6.700885392731187e-05,
529
+ "loss": 2.0034,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 1.3305499568096746,
534
+ "grad_norm": 1.7361541986465454,
535
+ "learning_rate": 6.613870684947231e-05,
536
+ "loss": 2.0202,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 1.3489778289663115,
541
+ "grad_norm": 1.2443159818649292,
542
+ "learning_rate": 6.526409991679134e-05,
543
+ "loss": 1.9933,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 1.3674057011229483,
548
+ "grad_norm": 0.6612122654914856,
549
+ "learning_rate": 6.438538331533768e-05,
550
+ "loss": 1.9769,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 1.3858335732795855,
555
+ "grad_norm": 0.6065022945404053,
556
+ "learning_rate": 6.350290887666078e-05,
557
+ "loss": 2.0084,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 1.3858335732795855,
562
+ "eval_loss": 2.0034306049346924,
563
+ "eval_runtime": 0.1919,
564
+ "eval_samples_per_second": 260.533,
565
+ "eval_steps_per_second": 67.739,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 1.4042614454362223,
570
+ "grad_norm": 0.7181031107902527,
571
+ "learning_rate": 6.261702993691994e-05,
572
+ "loss": 2.0097,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.4226893175928592,
577
+ "grad_norm": 1.1980915069580078,
578
+ "learning_rate": 6.172810119541118e-05,
579
+ "loss": 2.0081,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 1.441117189749496,
584
+ "grad_norm": 2.185781478881836,
585
+ "learning_rate": 6.083647857254837e-05,
586
+ "loss": 1.9817,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 1.459545061906133,
591
+ "grad_norm": 0.7530511021614075,
592
+ "learning_rate": 5.9942519067355284e-05,
593
+ "loss": 1.9986,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 1.4779729340627699,
598
+ "grad_norm": 1.7427482604980469,
599
+ "learning_rate": 5.904658061452585e-05,
600
+ "loss": 2.0496,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 1.4964008062194067,
605
+ "grad_norm": 2.031916618347168,
606
+ "learning_rate": 5.814902194110988e-05,
607
+ "loss": 2.0361,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 1.5148286783760438,
612
+ "grad_norm": 1.6156734228134155,
613
+ "learning_rate": 5.7250202422881336e-05,
614
+ "loss": 2.0416,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 1.5332565505326807,
619
+ "grad_norm": 0.9678031206130981,
620
+ "learning_rate": 5.635048194044702e-05,
621
+ "loss": 2.0342,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 1.5516844226893176,
626
+ "grad_norm": 0.4935534596443176,
627
+ "learning_rate": 5.5450220735153056e-05,
628
+ "loss": 1.9647,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 1.5701122948459545,
633
+ "grad_norm": 1.1301989555358887,
634
+ "learning_rate": 5.4549779264846955e-05,
635
+ "loss": 1.9986,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 1.5885401670025914,
640
+ "grad_norm": 2.0241589546203613,
641
+ "learning_rate": 5.3649518059552994e-05,
642
+ "loss": 2.0601,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 1.6069680391592285,
647
+ "grad_norm": 1.204086184501648,
648
+ "learning_rate": 5.2749797577118675e-05,
649
+ "loss": 1.9727,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 1.6253959113158651,
654
+ "grad_norm": 1.2840675115585327,
655
+ "learning_rate": 5.185097805889013e-05,
656
+ "loss": 2.0326,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 1.6438237834725022,
661
+ "grad_norm": 1.003303050994873,
662
+ "learning_rate": 5.0953419385474155e-05,
663
+ "loss": 1.9445,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 1.6622516556291391,
668
+ "grad_norm": 0.5838247537612915,
669
+ "learning_rate": 5.005748093264473e-05,
670
+ "loss": 1.9922,
671
+ "step": 90
672
+ },
673
+ {
674
+ "epoch": 1.680679527785776,
675
+ "grad_norm": 0.7593981623649597,
676
+ "learning_rate": 4.916352142745163e-05,
677
+ "loss": 1.966,
678
+ "step": 91
679
+ },
680
+ {
681
+ "epoch": 1.6991073999424129,
682
+ "grad_norm": 0.8177780508995056,
683
+ "learning_rate": 4.827189880458882e-05,
684
+ "loss": 1.9918,
685
+ "step": 92
686
+ },
687
+ {
688
+ "epoch": 1.7175352720990498,
689
+ "grad_norm": 0.8616487979888916,
690
+ "learning_rate": 4.7382970063080076e-05,
691
+ "loss": 2.0279,
692
+ "step": 93
693
+ },
694
+ {
695
+ "epoch": 1.7359631442556869,
696
+ "grad_norm": 1.1605690717697144,
697
+ "learning_rate": 4.649709112333923e-05,
698
+ "loss": 2.0104,
699
+ "step": 94
700
+ },
701
+ {
702
+ "epoch": 1.7543910164123235,
703
+ "grad_norm": 1.9952294826507568,
704
+ "learning_rate": 4.561461668466233e-05,
705
+ "loss": 1.9628,
706
+ "step": 95
707
+ },
708
+ {
709
+ "epoch": 1.7728188885689606,
710
+ "grad_norm": 1.4495019912719727,
711
+ "learning_rate": 4.473590008320868e-05,
712
+ "loss": 2.0058,
713
+ "step": 96
714
+ },
715
+ {
716
+ "epoch": 1.7912467607255975,
717
+ "grad_norm": 1.114969253540039,
718
+ "learning_rate": 4.386129315052768e-05,
719
+ "loss": 2.0131,
720
+ "step": 97
721
+ },
722
+ {
723
+ "epoch": 1.8096746328822344,
724
+ "grad_norm": 0.8008613586425781,
725
+ "learning_rate": 4.299114607268814e-05,
726
+ "loss": 1.9947,
727
+ "step": 98
728
+ },
729
+ {
730
+ "epoch": 1.8281025050388713,
731
+ "grad_norm": 0.7169589400291443,
732
+ "learning_rate": 4.2125807250066354e-05,
733
+ "loss": 1.9454,
734
+ "step": 99
735
+ },
736
+ {
737
+ "epoch": 1.8465303771955082,
738
+ "grad_norm": 1.009277582168579,
739
+ "learning_rate": 4.1265623157849235e-05,
740
+ "loss": 2.0115,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 1.8465303771955082,
745
+ "eval_loss": 1.994510531425476,
746
+ "eval_runtime": 0.192,
747
+ "eval_samples_per_second": 260.351,
748
+ "eval_steps_per_second": 67.691,
749
+ "step": 100
750
  }
751
  ],
752
  "logging_steps": 1,
 
775
  "attributes": {}
776
  }
777
  },
778
+ "total_flos": 928417715322880.0,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null