ZeroUniqueness commited on
Commit
8e479b2
β€’
1 Parent(s): 78ce51e

Training in progress, step 40000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac02c8b60ff2af2043c8bbd7012fcc6335dd99c02ab7c2e9f0bff405da9b9af
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b58822fe1e8262161f44bc61bc0ad0a9e7afd47e4892189dfa46e0a57e6dee1
3
  size 500897101
{checkpoint-36000 β†’ checkpoint-39000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-36000 β†’ checkpoint-39000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-36000 β†’ checkpoint-39000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23c2d32c288feead4318a69d3ff414e535f97637bea0a9ae28d2ce95cd3de348
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ac02c8b60ff2af2043c8bbd7012fcc6335dd99c02ab7c2e9f0bff405da9b9af
3
  size 500897101
{checkpoint-36000/adapter_model β†’ checkpoint-40000}/README.md RENAMED
File without changes
{checkpoint-36000/adapter_model β†’ checkpoint-40000}/adapter_config.json RENAMED
File without changes
{checkpoint-36000/adapter_model β†’ checkpoint-40000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23c2d32c288feead4318a69d3ff414e535f97637bea0a9ae28d2ce95cd3de348
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b58822fe1e8262161f44bc61bc0ad0a9e7afd47e4892189dfa46e0a57e6dee1
3
  size 500897101
{checkpoint-36000 β†’ checkpoint-40000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c0e70ef55de1c6cbc98cd075c596e3c9cd2b094500bf8236882d502fa6ca5b9
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd8245362ab2a8b70d0dc9675794736e143bd184a4de380caa12617c8833da7b
3
  size 1001723453
{checkpoint-36000 β†’ checkpoint-40000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b82ab6c9b5a2316a2519ca0cee67ceeb9c6adae29a4e09565a77cf2bd0f34867
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c27ba48931454fb9cea28943695a625f9f93123a1cb450835a0e8e0f21bcf26
3
  size 14575
{checkpoint-36000 β†’ checkpoint-40000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fee39e6258450748b3bb44b98b7db3fb15c683df3b6ec39e41a963424526f75
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c82428b9badf946a212907d36a6d1a7db1b15e77a0dbc7be8d578794535936cc
3
  size 627
{checkpoint-36000 β†’ checkpoint-40000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.5594063997268677,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-36000",
4
- "epoch": 1.342231833264979,
5
- "global_step": 36000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2454,11 +2454,283 @@
2454
  "eval_samples_per_second": 0.427,
2455
  "eval_steps_per_second": 0.427,
2456
  "step": 36000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2457
  }
2458
  ],
2459
  "max_steps": 80463,
2460
  "num_train_epochs": 3,
2461
- "total_flos": 1.009532895096152e+19,
2462
  "trial_name": null,
2463
  "trial_params": null
2464
  }
 
1
  {
2
+ "best_metric": 0.5438262820243835,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-40000",
4
+ "epoch": 1.4913687036277543,
5
+ "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2454
  "eval_samples_per_second": 0.427,
2455
  "eval_steps_per_second": 0.427,
2456
  "step": 36000
2457
+ },
2458
+ {
2459
+ "epoch": 1.35,
2460
+ "learning_rate": 0.00011608238759007561,
2461
+ "loss": 0.5268,
2462
+ "step": 36100
2463
+ },
2464
+ {
2465
+ "epoch": 1.35,
2466
+ "learning_rate": 0.00011569686094752101,
2467
+ "loss": 0.5179,
2468
+ "step": 36200
2469
+ },
2470
+ {
2471
+ "epoch": 1.35,
2472
+ "learning_rate": 0.00011531109495822545,
2473
+ "loss": 0.5236,
2474
+ "step": 36300
2475
+ },
2476
+ {
2477
+ "epoch": 1.36,
2478
+ "learning_rate": 0.00011492509550437339,
2479
+ "loss": 0.5197,
2480
+ "step": 36400
2481
+ },
2482
+ {
2483
+ "epoch": 1.36,
2484
+ "learning_rate": 0.0001145388684717092,
2485
+ "loss": 0.5109,
2486
+ "step": 36500
2487
+ },
2488
+ {
2489
+ "epoch": 1.36,
2490
+ "learning_rate": 0.00011415241974944744,
2491
+ "loss": 0.5126,
2492
+ "step": 36600
2493
+ },
2494
+ {
2495
+ "epoch": 1.37,
2496
+ "learning_rate": 0.00011376575523018296,
2497
+ "loss": 0.501,
2498
+ "step": 36700
2499
+ },
2500
+ {
2501
+ "epoch": 1.37,
2502
+ "learning_rate": 0.00011337888080980115,
2503
+ "loss": 0.4888,
2504
+ "step": 36800
2505
+ },
2506
+ {
2507
+ "epoch": 1.38,
2508
+ "learning_rate": 0.00011299180238738789,
2509
+ "loss": 0.5324,
2510
+ "step": 36900
2511
+ },
2512
+ {
2513
+ "epoch": 1.38,
2514
+ "learning_rate": 0.00011260452586513981,
2515
+ "loss": 0.5053,
2516
+ "step": 37000
2517
+ },
2518
+ {
2519
+ "epoch": 1.38,
2520
+ "eval_loss": 0.5555862188339233,
2521
+ "eval_runtime": 1324.5375,
2522
+ "eval_samples_per_second": 0.409,
2523
+ "eval_steps_per_second": 0.409,
2524
+ "step": 37000
2525
+ },
2526
+ {
2527
+ "epoch": 1.38,
2528
+ "learning_rate": 0.00011221705714827408,
2529
+ "loss": 0.4994,
2530
+ "step": 37100
2531
+ },
2532
+ {
2533
+ "epoch": 1.39,
2534
+ "learning_rate": 0.00011182940214493858,
2535
+ "loss": 0.4826,
2536
+ "step": 37200
2537
+ },
2538
+ {
2539
+ "epoch": 1.39,
2540
+ "learning_rate": 0.0001114415667661215,
2541
+ "loss": 0.5173,
2542
+ "step": 37300
2543
+ },
2544
+ {
2545
+ "epoch": 1.39,
2546
+ "learning_rate": 0.00011105355692556165,
2547
+ "loss": 0.4992,
2548
+ "step": 37400
2549
+ },
2550
+ {
2551
+ "epoch": 1.4,
2552
+ "learning_rate": 0.00011066537853965788,
2553
+ "loss": 0.5155,
2554
+ "step": 37500
2555
+ },
2556
+ {
2557
+ "epoch": 1.4,
2558
+ "learning_rate": 0.00011027703752737913,
2559
+ "loss": 0.5118,
2560
+ "step": 37600
2561
+ },
2562
+ {
2563
+ "epoch": 1.41,
2564
+ "learning_rate": 0.00010988853981017393,
2565
+ "loss": 0.5078,
2566
+ "step": 37700
2567
+ },
2568
+ {
2569
+ "epoch": 1.41,
2570
+ "learning_rate": 0.00010949989131188043,
2571
+ "loss": 0.506,
2572
+ "step": 37800
2573
+ },
2574
+ {
2575
+ "epoch": 1.41,
2576
+ "learning_rate": 0.00010911109795863581,
2577
+ "loss": 0.5074,
2578
+ "step": 37900
2579
+ },
2580
+ {
2581
+ "epoch": 1.42,
2582
+ "learning_rate": 0.00010872216567878599,
2583
+ "loss": 0.4837,
2584
+ "step": 38000
2585
+ },
2586
+ {
2587
+ "epoch": 1.42,
2588
+ "eval_loss": 0.5529844164848328,
2589
+ "eval_runtime": 1281.6978,
2590
+ "eval_samples_per_second": 0.423,
2591
+ "eval_steps_per_second": 0.423,
2592
+ "step": 38000
2593
+ },
2594
+ {
2595
+ "epoch": 1.42,
2596
+ "learning_rate": 0.00010833310040279531,
2597
+ "loss": 0.5161,
2598
+ "step": 38100
2599
+ },
2600
+ {
2601
+ "epoch": 1.42,
2602
+ "learning_rate": 0.00010794390806315602,
2603
+ "loss": 0.4929,
2604
+ "step": 38200
2605
+ },
2606
+ {
2607
+ "epoch": 1.43,
2608
+ "learning_rate": 0.0001075545945942978,
2609
+ "loss": 0.4819,
2610
+ "step": 38300
2611
+ },
2612
+ {
2613
+ "epoch": 1.43,
2614
+ "learning_rate": 0.00010716516593249742,
2615
+ "loss": 0.4594,
2616
+ "step": 38400
2617
+ },
2618
+ {
2619
+ "epoch": 1.44,
2620
+ "learning_rate": 0.00010677562801578798,
2621
+ "loss": 0.4802,
2622
+ "step": 38500
2623
+ },
2624
+ {
2625
+ "epoch": 1.44,
2626
+ "learning_rate": 0.00010638598678386864,
2627
+ "loss": 0.4793,
2628
+ "step": 38600
2629
+ },
2630
+ {
2631
+ "epoch": 1.44,
2632
+ "learning_rate": 0.00010599624817801383,
2633
+ "loss": 0.4912,
2634
+ "step": 38700
2635
+ },
2636
+ {
2637
+ "epoch": 1.45,
2638
+ "learning_rate": 0.0001056064181409828,
2639
+ "loss": 0.519,
2640
+ "step": 38800
2641
+ },
2642
+ {
2643
+ "epoch": 1.45,
2644
+ "learning_rate": 0.00010521650261692886,
2645
+ "loss": 0.4993,
2646
+ "step": 38900
2647
+ },
2648
+ {
2649
+ "epoch": 1.45,
2650
+ "learning_rate": 0.00010482650755130898,
2651
+ "loss": 0.4688,
2652
+ "step": 39000
2653
+ },
2654
+ {
2655
+ "epoch": 1.45,
2656
+ "eval_loss": 0.5486682057380676,
2657
+ "eval_runtime": 1265.4476,
2658
+ "eval_samples_per_second": 0.428,
2659
+ "eval_steps_per_second": 0.428,
2660
+ "step": 39000
2661
+ },
2662
+ {
2663
+ "epoch": 1.46,
2664
+ "learning_rate": 0.00010443643889079282,
2665
+ "loss": 0.4901,
2666
+ "step": 39100
2667
+ },
2668
+ {
2669
+ "epoch": 1.46,
2670
+ "learning_rate": 0.00010404630258317236,
2671
+ "loss": 0.4783,
2672
+ "step": 39200
2673
+ },
2674
+ {
2675
+ "epoch": 1.47,
2676
+ "learning_rate": 0.00010365610457727095,
2677
+ "loss": 0.5026,
2678
+ "step": 39300
2679
+ },
2680
+ {
2681
+ "epoch": 1.47,
2682
+ "learning_rate": 0.00010326585082285279,
2683
+ "loss": 0.4602,
2684
+ "step": 39400
2685
+ },
2686
+ {
2687
+ "epoch": 1.47,
2688
+ "learning_rate": 0.00010287554727053215,
2689
+ "loss": 0.5299,
2690
+ "step": 39500
2691
+ },
2692
+ {
2693
+ "epoch": 1.48,
2694
+ "learning_rate": 0.00010248519987168252,
2695
+ "loss": 0.5135,
2696
+ "step": 39600
2697
+ },
2698
+ {
2699
+ "epoch": 1.48,
2700
+ "learning_rate": 0.00010209481457834616,
2701
+ "loss": 0.4792,
2702
+ "step": 39700
2703
+ },
2704
+ {
2705
+ "epoch": 1.48,
2706
+ "learning_rate": 0.0001017043973431429,
2707
+ "loss": 0.5004,
2708
+ "step": 39800
2709
+ },
2710
+ {
2711
+ "epoch": 1.49,
2712
+ "learning_rate": 0.00010131395411917979,
2713
+ "loss": 0.5013,
2714
+ "step": 39900
2715
+ },
2716
+ {
2717
+ "epoch": 1.49,
2718
+ "learning_rate": 0.00010092349085996011,
2719
+ "loss": 0.501,
2720
+ "step": 40000
2721
+ },
2722
+ {
2723
+ "epoch": 1.49,
2724
+ "eval_loss": 0.5438262820243835,
2725
+ "eval_runtime": 1262.0322,
2726
+ "eval_samples_per_second": 0.429,
2727
+ "eval_steps_per_second": 0.429,
2728
+ "step": 40000
2729
  }
2730
  ],
2731
  "max_steps": 80463,
2732
  "num_train_epochs": 3,
2733
+ "total_flos": 1.1217846349451428e+19,
2734
  "trial_name": null,
2735
  "trial_params": null
2736
  }
{checkpoint-36000 β†’ checkpoint-40000}/training_args.bin RENAMED
File without changes