Nexspear commited on
Commit
16b1684
·
verified ·
1 Parent(s): f1abeb5

Training in progress, step 361, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:097d4fdc64d9ee31a96db161db9847ba9166f14e21a958dcd74c46699dc78bed
3
  size 156926880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67a979db55d6147b4bb468cc931d0dc8300c8fa1ab3c13b8c292d544c3bd1e3
3
  size 156926880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:599ab671ddb516283dd476bb8dab0a2d13c74dd97a085f6c7370357fb354d6db
3
  size 79968964
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d339f91f68c41787265c459e58cdd100e6584ccd25d3117f3c5009d953ffe0e
3
  size 79968964
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da88a4b66e5e83e157425af0894871d0d630078858be2e7385a95662ae93763b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f80ea97359604fbd8013d8795dac270f4675ca2d0b7dc0a23dab94f1eb1a2d7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6e032ea5899f5eb25699f91880bee459f3b4e6bc3c26d1b136de1a7fd506249
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b857be7b95ff8324c4727de3c0f481a268cea8c6e2533b10d776846f18e23993
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.32943063974380493,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
- "epoch": 2.908713692946058,
5
  "eval_steps": 50,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2521,6 +2521,83 @@
2521
  "eval_samples_per_second": 28.433,
2522
  "eval_steps_per_second": 7.143,
2523
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2524
  }
2525
  ],
2526
  "logging_steps": 1,
@@ -2544,12 +2621,12 @@
2544
  "should_evaluate": false,
2545
  "should_log": false,
2546
  "should_save": true,
2547
- "should_training_stop": false
2548
  },
2549
  "attributes": {}
2550
  }
2551
  },
2552
- "total_flos": 1.390780807643136e+17,
2553
  "train_batch_size": 8,
2554
  "trial_name": null,
2555
  "trial_params": null
 
1
  {
2
  "best_metric": 0.32943063974380493,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
+ "epoch": 3.0020746887966805,
5
  "eval_steps": 50,
6
+ "global_step": 361,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2521
  "eval_samples_per_second": 28.433,
2522
  "eval_steps_per_second": 7.143,
2523
  "step": 350
2524
+ },
2525
+ {
2526
+ "epoch": 2.91701244813278,
2527
+ "grad_norm": 1.8393205404281616,
2528
+ "learning_rate": 2.0014077392525031e-07,
2529
+ "loss": 0.0879,
2530
+ "step": 351
2531
+ },
2532
+ {
2533
+ "epoch": 2.9253112033195023,
2534
+ "grad_norm": 1.0853371620178223,
2535
+ "learning_rate": 1.6213459328950352e-07,
2536
+ "loss": 0.0175,
2537
+ "step": 352
2538
+ },
2539
+ {
2540
+ "epoch": 2.9336099585062243,
2541
+ "grad_norm": 1.8038127422332764,
2542
+ "learning_rate": 1.281208861894201e-07,
2543
+ "loss": 0.0201,
2544
+ "step": 353
2545
+ },
2546
+ {
2547
+ "epoch": 2.9419087136929463,
2548
+ "grad_norm": 0.9513048529624939,
2549
+ "learning_rate": 9.810237743724803e-08,
2550
+ "loss": 0.0115,
2551
+ "step": 354
2552
+ },
2553
+ {
2554
+ "epoch": 2.9502074688796682,
2555
+ "grad_norm": 0.7663481831550598,
2556
+ "learning_rate": 7.208147179291192e-08,
2557
+ "loss": 0.0143,
2558
+ "step": 355
2559
+ },
2560
+ {
2561
+ "epoch": 2.95850622406639,
2562
+ "grad_norm": 1.1975268125534058,
2563
+ "learning_rate": 5.006025377138901e-08,
2564
+ "loss": 0.0185,
2565
+ "step": 356
2566
+ },
2567
+ {
2568
+ "epoch": 2.966804979253112,
2569
+ "grad_norm": 1.4600636959075928,
2570
+ "learning_rate": 3.2040487475731854e-08,
2571
+ "loss": 0.0263,
2572
+ "step": 357
2573
+ },
2574
+ {
2575
+ "epoch": 2.975103734439834,
2576
+ "grad_norm": 1.718473196029663,
2577
+ "learning_rate": 1.802361645573125e-08,
2578
+ "loss": 0.0435,
2579
+ "step": 358
2580
+ },
2581
+ {
2582
+ "epoch": 2.983402489626556,
2583
+ "grad_norm": 1.1524897813796997,
2584
+ "learning_rate": 8.010763592264381e-09,
2585
+ "loss": 0.024,
2586
+ "step": 359
2587
+ },
2588
+ {
2589
+ "epoch": 2.991701244813278,
2590
+ "grad_norm": 5.361166954040527,
2591
+ "learning_rate": 2.0027310073833518e-09,
2592
+ "loss": 0.0739,
2593
+ "step": 360
2594
+ },
2595
+ {
2596
+ "epoch": 3.0020746887966805,
2597
+ "grad_norm": 4.823180675506592,
2598
+ "learning_rate": 0.0,
2599
+ "loss": 0.1414,
2600
+ "step": 361
2601
  }
2602
  ],
2603
  "logging_steps": 1,
 
2621
  "should_evaluate": false,
2622
  "should_log": false,
2623
  "should_save": true,
2624
+ "should_training_stop": true
2625
  },
2626
  "attributes": {}
2627
  }
2628
  },
2629
+ "total_flos": 1.4344910615976346e+17,
2630
  "train_batch_size": 8,
2631
  "trial_name": null,
2632
  "trial_params": null