ZeroUniqueness commited on
Commit
1823816
β€’
1 Parent(s): f967b08

almost donezies

Browse files
Files changed (49) hide show
  1. adapter_model.bin +1 -1
  2. checkpoint-70000/adapter_model.bin +0 -3
  3. checkpoint-70000/adapter_model/adapter_model.bin +0 -3
  4. checkpoint-70500/adapter_model.bin +0 -3
  5. checkpoint-70500/adapter_model/adapter_model.bin +0 -3
  6. {checkpoint-69000 β†’ checkpoint-76500}/README.md +0 -0
  7. {checkpoint-69000 β†’ checkpoint-76500}/adapter_config.json +0 -0
  8. {checkpoint-69000 β†’ checkpoint-76500}/adapter_model.bin +1 -1
  9. {checkpoint-69000 β†’ checkpoint-76500}/adapter_model/README.md +0 -0
  10. {checkpoint-69000 β†’ checkpoint-76500}/adapter_model/adapter_config.json +0 -0
  11. {checkpoint-69000 β†’ checkpoint-76500}/adapter_model/adapter_model.bin +1 -1
  12. {checkpoint-69500 β†’ checkpoint-76500}/optimizer.pt +1 -1
  13. {checkpoint-69500 β†’ checkpoint-76500}/rng_state.pth +1 -1
  14. {checkpoint-70000 β†’ checkpoint-76500}/scheduler.pt +1 -1
  15. {checkpoint-70500 β†’ checkpoint-76500}/trainer_state.json +173 -5
  16. {checkpoint-69000 β†’ checkpoint-76500}/training_args.bin +0 -0
  17. {checkpoint-69500 β†’ checkpoint-77000}/README.md +0 -0
  18. {checkpoint-69500 β†’ checkpoint-77000}/adapter_config.json +0 -0
  19. {checkpoint-69500 β†’ checkpoint-77000}/adapter_model.bin +1 -1
  20. {checkpoint-69500 β†’ checkpoint-77000}/adapter_model/README.md +0 -0
  21. {checkpoint-69500 β†’ checkpoint-77000}/adapter_model/adapter_config.json +0 -0
  22. {checkpoint-69500 β†’ checkpoint-77000}/adapter_model/adapter_model.bin +1 -1
  23. {checkpoint-70000 β†’ checkpoint-77000}/optimizer.pt +1 -1
  24. {checkpoint-70000 β†’ checkpoint-77000}/rng_state.pth +1 -1
  25. {checkpoint-69500 β†’ checkpoint-77000}/scheduler.pt +1 -1
  26. {checkpoint-70000 β†’ checkpoint-77000}/trainer_state.json +201 -5
  27. {checkpoint-69500 β†’ checkpoint-77000}/training_args.bin +0 -0
  28. {checkpoint-70000 β†’ checkpoint-77500}/README.md +0 -0
  29. {checkpoint-70000 β†’ checkpoint-77500}/adapter_config.json +0 -0
  30. checkpoint-77500/adapter_model.bin +3 -0
  31. {checkpoint-70000 β†’ checkpoint-77500}/adapter_model/README.md +0 -0
  32. {checkpoint-70000 β†’ checkpoint-77500}/adapter_model/adapter_config.json +0 -0
  33. checkpoint-77500/adapter_model/adapter_model.bin +3 -0
  34. {checkpoint-69000 β†’ checkpoint-77500}/optimizer.pt +1 -1
  35. {checkpoint-70500 β†’ checkpoint-77500}/rng_state.pth +1 -1
  36. {checkpoint-69000 β†’ checkpoint-77500}/scheduler.pt +1 -1
  37. {checkpoint-69500 β†’ checkpoint-77500}/trainer_state.json +229 -5
  38. {checkpoint-70000 β†’ checkpoint-77500}/training_args.bin +0 -0
  39. {checkpoint-70500 β†’ checkpoint-78000}/README.md +0 -0
  40. {checkpoint-70500 β†’ checkpoint-78000}/adapter_config.json +0 -0
  41. checkpoint-78000/adapter_model.bin +3 -0
  42. {checkpoint-70500 β†’ checkpoint-78000}/adapter_model/README.md +0 -0
  43. {checkpoint-70500 β†’ checkpoint-78000}/adapter_model/adapter_config.json +0 -0
  44. checkpoint-78000/adapter_model/adapter_model.bin +3 -0
  45. {checkpoint-70500 β†’ checkpoint-78000}/optimizer.pt +1 -1
  46. {checkpoint-69000 β†’ checkpoint-78000}/rng_state.pth +1 -1
  47. {checkpoint-70500 β†’ checkpoint-78000}/scheduler.pt +1 -1
  48. {checkpoint-69000 β†’ checkpoint-78000}/trainer_state.json +257 -5
  49. {checkpoint-70500 β†’ checkpoint-78000}/training_args.bin +0 -0
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
3
  size 500897101
checkpoint-70000/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c
3
- size 500897101
 
 
 
 
checkpoint-70000/adapter_model/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c
3
- size 500897101
 
 
 
 
checkpoint-70500/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
3
- size 500897101
 
 
 
 
checkpoint-70500/adapter_model/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
3
- size 500897101
 
 
 
 
{checkpoint-69000 β†’ checkpoint-76500}/README.md RENAMED
File without changes
{checkpoint-69000 β†’ checkpoint-76500}/adapter_config.json RENAMED
File without changes
{checkpoint-69000 β†’ checkpoint-76500}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16127581d1b65765200af747a5c98d27b237b49430e306dfd23a9c3ad6af3b9c
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aac0f31027f89872a0b3f8a0a220bec95e100567f0e22fd44826f62a28c01a6
3
  size 500897101
{checkpoint-69000 β†’ checkpoint-76500}/adapter_model/README.md RENAMED
File without changes
{checkpoint-69000 β†’ checkpoint-76500}/adapter_model/adapter_config.json RENAMED
File without changes
{checkpoint-69000 β†’ checkpoint-76500}/adapter_model/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16127581d1b65765200af747a5c98d27b237b49430e306dfd23a9c3ad6af3b9c
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aac0f31027f89872a0b3f8a0a220bec95e100567f0e22fd44826f62a28c01a6
3
  size 500897101
{checkpoint-69500 β†’ checkpoint-76500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0df421a10c3784a131b0ab37e1485ed063b6fa56024cc56104f9dbaad09ebe1
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbbb89893d47261fd01a9f3778a4172d980e5d5d899645ffb5c27307d67df6b6
3
  size 1001724605
{checkpoint-69500 β†’ checkpoint-76500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9ee221e71303f97217b0d58a1364dcc9e4c1fac4ba0baf829b9e79b7ae1680b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77cc93baddf8ab32d29af2c1c219f38a87a8af2868d723737c13df696ee1f2ad
3
  size 14575
{checkpoint-70000 β†’ checkpoint-76500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d978803312071ed04341fcce57866c271d97c1ced7225c7be19f70453e4d9836
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220829e44432d3fbc0f85c119c5401ba1c2989f858b8188acbaba28a1379d42e
3
  size 627
{checkpoint-70500 β†’ checkpoint-76500}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4780386686325073,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-70500",
4
- "epoch": 2.628537340143917,
5
  "eval_steps": 500,
6
- "global_step": 70500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4341,13 +4341,181 @@
4341
  "eval_samples_per_second": 0.42,
4342
  "eval_steps_per_second": 0.42,
4343
  "step": 70500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4344
  }
4345
  ],
4346
  "logging_steps": 500,
4347
  "max_steps": 80463,
4348
  "num_train_epochs": 3,
4349
  "save_steps": 500,
4350
- "total_flos": 1.978819419542102e+19,
4351
  "trial_name": null,
4352
  "trial_params": null
4353
  }
 
1
  {
2
+ "best_metric": 0.4760077893733978,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-76500",
4
+ "epoch": 2.8522426456880803,
5
  "eval_steps": 500,
6
+ "global_step": 76500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4341
  "eval_samples_per_second": 0.42,
4342
  "eval_steps_per_second": 0.42,
4343
  "step": 70500
4344
+ },
4345
+ {
4346
+ "epoch": 2.65,
4347
+ "learning_rate": 6.749873173827314e-06,
4348
+ "loss": 0.3746,
4349
+ "step": 71000
4350
+ },
4351
+ {
4352
+ "epoch": 2.65,
4353
+ "eval_loss": 0.47773027420043945,
4354
+ "eval_runtime": 1293.7698,
4355
+ "eval_samples_per_second": 0.419,
4356
+ "eval_steps_per_second": 0.419,
4357
+ "step": 71000
4358
+ },
4359
+ {
4360
+ "epoch": 2.67,
4361
+ "learning_rate": 6.0625347721849805e-06,
4362
+ "loss": 0.365,
4363
+ "step": 71500
4364
+ },
4365
+ {
4366
+ "epoch": 2.67,
4367
+ "eval_loss": 0.47759953141212463,
4368
+ "eval_runtime": 1287.2533,
4369
+ "eval_samples_per_second": 0.421,
4370
+ "eval_steps_per_second": 0.421,
4371
+ "step": 71500
4372
+ },
4373
+ {
4374
+ "epoch": 2.68,
4375
+ "learning_rate": 5.411004390662034e-06,
4376
+ "loss": 0.3614,
4377
+ "step": 72000
4378
+ },
4379
+ {
4380
+ "epoch": 2.68,
4381
+ "eval_loss": 0.4774133861064911,
4382
+ "eval_runtime": 1290.2562,
4383
+ "eval_samples_per_second": 0.42,
4384
+ "eval_steps_per_second": 0.42,
4385
+ "step": 72000
4386
+ },
4387
+ {
4388
+ "epoch": 2.7,
4389
+ "learning_rate": 4.795530386109038e-06,
4390
+ "loss": 0.3672,
4391
+ "step": 72500
4392
+ },
4393
+ {
4394
+ "epoch": 2.7,
4395
+ "eval_loss": 0.4771479070186615,
4396
+ "eval_runtime": 1313.3814,
4397
+ "eval_samples_per_second": 0.413,
4398
+ "eval_steps_per_second": 0.413,
4399
+ "step": 72500
4400
+ },
4401
+ {
4402
+ "epoch": 2.72,
4403
+ "learning_rate": 4.2163473710470355e-06,
4404
+ "loss": 0.3536,
4405
+ "step": 73000
4406
+ },
4407
+ {
4408
+ "epoch": 2.72,
4409
+ "eval_loss": 0.4770236909389496,
4410
+ "eval_runtime": 1301.9499,
4411
+ "eval_samples_per_second": 0.416,
4412
+ "eval_steps_per_second": 0.416,
4413
+ "step": 73000
4414
+ },
4415
+ {
4416
+ "epoch": 2.74,
4417
+ "learning_rate": 3.67367612423567e-06,
4418
+ "loss": 0.3693,
4419
+ "step": 73500
4420
+ },
4421
+ {
4422
+ "epoch": 2.74,
4423
+ "eval_loss": 0.4766899645328522,
4424
+ "eval_runtime": 1310.5415,
4425
+ "eval_samples_per_second": 0.414,
4426
+ "eval_steps_per_second": 0.414,
4427
+ "step": 73500
4428
+ },
4429
+ {
4430
+ "epoch": 2.76,
4431
+ "learning_rate": 3.1677235065144862e-06,
4432
+ "loss": 0.358,
4433
+ "step": 74000
4434
+ },
4435
+ {
4436
+ "epoch": 2.76,
4437
+ "eval_loss": 0.47646036744117737,
4438
+ "eval_runtime": 1327.3256,
4439
+ "eval_samples_per_second": 0.408,
4440
+ "eval_steps_per_second": 0.408,
4441
+ "step": 74000
4442
+ },
4443
+ {
4444
+ "epoch": 2.78,
4445
+ "learning_rate": 2.6986823819497353e-06,
4446
+ "loss": 0.3653,
4447
+ "step": 74500
4448
+ },
4449
+ {
4450
+ "epoch": 2.78,
4451
+ "eval_loss": 0.47627386450767517,
4452
+ "eval_runtime": 1332.1149,
4453
+ "eval_samples_per_second": 0.407,
4454
+ "eval_steps_per_second": 0.407,
4455
+ "step": 74500
4456
+ },
4457
+ {
4458
+ "epoch": 2.8,
4459
+ "learning_rate": 2.266731544316425e-06,
4460
+ "loss": 0.3743,
4461
+ "step": 75000
4462
+ },
4463
+ {
4464
+ "epoch": 2.8,
4465
+ "eval_loss": 0.47608959674835205,
4466
+ "eval_runtime": 1305.4101,
4467
+ "eval_samples_per_second": 0.415,
4468
+ "eval_steps_per_second": 0.415,
4469
+ "step": 75000
4470
+ },
4471
+ {
4472
+ "epoch": 2.81,
4473
+ "learning_rate": 1.872035648944026e-06,
4474
+ "loss": 0.3659,
4475
+ "step": 75500
4476
+ },
4477
+ {
4478
+ "epoch": 2.81,
4479
+ "eval_loss": 0.476179838180542,
4480
+ "eval_runtime": 1301.8331,
4481
+ "eval_samples_per_second": 0.416,
4482
+ "eval_steps_per_second": 0.416,
4483
+ "step": 75500
4484
+ },
4485
+ {
4486
+ "epoch": 2.83,
4487
+ "learning_rate": 1.5147451499514353e-06,
4488
+ "loss": 0.3678,
4489
+ "step": 76000
4490
+ },
4491
+ {
4492
+ "epoch": 2.83,
4493
+ "eval_loss": 0.4760454595088959,
4494
+ "eval_runtime": 1297.73,
4495
+ "eval_samples_per_second": 0.418,
4496
+ "eval_steps_per_second": 0.418,
4497
+ "step": 76000
4498
+ },
4499
+ {
4500
+ "epoch": 2.85,
4501
+ "learning_rate": 1.1949962428953965e-06,
4502
+ "loss": 0.3672,
4503
+ "step": 76500
4504
+ },
4505
+ {
4506
+ "epoch": 2.85,
4507
+ "eval_loss": 0.4760077893733978,
4508
+ "eval_runtime": 1293.9854,
4509
+ "eval_samples_per_second": 0.419,
4510
+ "eval_steps_per_second": 0.419,
4511
+ "step": 76500
4512
  }
4513
  ],
4514
  "logging_steps": 500,
4515
  "max_steps": 80463,
4516
  "num_train_epochs": 3,
4517
  "save_steps": 500,
4518
+ "total_flos": 2.14853629526682e+19,
4519
  "trial_name": null,
4520
  "trial_params": null
4521
  }
{checkpoint-69000 β†’ checkpoint-76500}/training_args.bin RENAMED
File without changes
{checkpoint-69500 β†’ checkpoint-77000}/README.md RENAMED
File without changes
{checkpoint-69500 β†’ checkpoint-77000}/adapter_config.json RENAMED
File without changes
{checkpoint-69500 β†’ checkpoint-77000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d536051f2a1ab536e6e716808efa406b8fc4bc641ebcf6102a663de9eab5ffe
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1427c39ed89366176a26d42309badaecc4eaac4173d3df01371a38612540cbce
3
  size 500897101
{checkpoint-69500 β†’ checkpoint-77000}/adapter_model/README.md RENAMED
File without changes
{checkpoint-69500 β†’ checkpoint-77000}/adapter_model/adapter_config.json RENAMED
File without changes
{checkpoint-69500 β†’ checkpoint-77000}/adapter_model/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d536051f2a1ab536e6e716808efa406b8fc4bc641ebcf6102a663de9eab5ffe
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1427c39ed89366176a26d42309badaecc4eaac4173d3df01371a38612540cbce
3
  size 500897101
{checkpoint-70000 β†’ checkpoint-77000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3757834dca752ceb36448c74c65b6c698a3cf7eac3b443be1d20520a1ef75c80
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09958ee930a5c1cdb447dc5ca98b44b0a8ac3e23351c47128a6daf915aa3809
3
  size 1001724605
{checkpoint-70000 β†’ checkpoint-77000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3b4a721a0714cca4311a027981bf55d9c240a69a7f46c912f368eb795c5d17f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f162395a6f7d2e1af70b53e882440048027967f43d5301d750609c6c591e4ca3
3
  size 14575
{checkpoint-69500 β†’ checkpoint-77000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bca69d6e74edb4d1fa3e9c45efbdb18d22e7412cb25b7cb947ef97719376c1f2
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a18202efac039c56b779ce26337552adb710311faff67d76d05cf3142d22af
3
  size 627
{checkpoint-70000 β†’ checkpoint-77000}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.47838443517684937,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-70000",
4
- "epoch": 2.6098952313485704,
5
  "eval_steps": 500,
6
- "global_step": 70000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4327,13 +4327,209 @@
4327
  "eval_samples_per_second": 0.415,
4328
  "eval_steps_per_second": 0.415,
4329
  "step": 70000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4330
  }
4331
  ],
4332
  "logging_steps": 500,
4333
  "max_steps": 80463,
4334
  "num_train_epochs": 3,
4335
  "save_steps": 500,
4336
- "total_flos": 1.96476655962565e+19,
4337
  "trial_name": null,
4338
  "trial_params": null
4339
  }
 
1
  {
2
+ "best_metric": 0.47582224011421204,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-77000",
4
+ "epoch": 2.870884754483427,
5
  "eval_steps": 500,
6
+ "global_step": 77000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4327
  "eval_samples_per_second": 0.415,
4328
  "eval_steps_per_second": 0.415,
4329
  "step": 70000
4330
+ },
4331
+ {
4332
+ "epoch": 2.63,
4333
+ "learning_rate": 7.472757589080226e-06,
4334
+ "loss": 0.3614,
4335
+ "step": 70500
4336
+ },
4337
+ {
4338
+ "epoch": 2.63,
4339
+ "eval_loss": 0.4780386686325073,
4340
+ "eval_runtime": 1290.4017,
4341
+ "eval_samples_per_second": 0.42,
4342
+ "eval_steps_per_second": 0.42,
4343
+ "step": 70500
4344
+ },
4345
+ {
4346
+ "epoch": 2.65,
4347
+ "learning_rate": 6.749873173827314e-06,
4348
+ "loss": 0.3746,
4349
+ "step": 71000
4350
+ },
4351
+ {
4352
+ "epoch": 2.65,
4353
+ "eval_loss": 0.47773027420043945,
4354
+ "eval_runtime": 1293.7698,
4355
+ "eval_samples_per_second": 0.419,
4356
+ "eval_steps_per_second": 0.419,
4357
+ "step": 71000
4358
+ },
4359
+ {
4360
+ "epoch": 2.67,
4361
+ "learning_rate": 6.0625347721849805e-06,
4362
+ "loss": 0.365,
4363
+ "step": 71500
4364
+ },
4365
+ {
4366
+ "epoch": 2.67,
4367
+ "eval_loss": 0.47759953141212463,
4368
+ "eval_runtime": 1287.2533,
4369
+ "eval_samples_per_second": 0.421,
4370
+ "eval_steps_per_second": 0.421,
4371
+ "step": 71500
4372
+ },
4373
+ {
4374
+ "epoch": 2.68,
4375
+ "learning_rate": 5.411004390662034e-06,
4376
+ "loss": 0.3614,
4377
+ "step": 72000
4378
+ },
4379
+ {
4380
+ "epoch": 2.68,
4381
+ "eval_loss": 0.4774133861064911,
4382
+ "eval_runtime": 1290.2562,
4383
+ "eval_samples_per_second": 0.42,
4384
+ "eval_steps_per_second": 0.42,
4385
+ "step": 72000
4386
+ },
4387
+ {
4388
+ "epoch": 2.7,
4389
+ "learning_rate": 4.795530386109038e-06,
4390
+ "loss": 0.3672,
4391
+ "step": 72500
4392
+ },
4393
+ {
4394
+ "epoch": 2.7,
4395
+ "eval_loss": 0.4771479070186615,
4396
+ "eval_runtime": 1313.3814,
4397
+ "eval_samples_per_second": 0.413,
4398
+ "eval_steps_per_second": 0.413,
4399
+ "step": 72500
4400
+ },
4401
+ {
4402
+ "epoch": 2.72,
4403
+ "learning_rate": 4.2163473710470355e-06,
4404
+ "loss": 0.3536,
4405
+ "step": 73000
4406
+ },
4407
+ {
4408
+ "epoch": 2.72,
4409
+ "eval_loss": 0.4770236909389496,
4410
+ "eval_runtime": 1301.9499,
4411
+ "eval_samples_per_second": 0.416,
4412
+ "eval_steps_per_second": 0.416,
4413
+ "step": 73000
4414
+ },
4415
+ {
4416
+ "epoch": 2.74,
4417
+ "learning_rate": 3.67367612423567e-06,
4418
+ "loss": 0.3693,
4419
+ "step": 73500
4420
+ },
4421
+ {
4422
+ "epoch": 2.74,
4423
+ "eval_loss": 0.4766899645328522,
4424
+ "eval_runtime": 1310.5415,
4425
+ "eval_samples_per_second": 0.414,
4426
+ "eval_steps_per_second": 0.414,
4427
+ "step": 73500
4428
+ },
4429
+ {
4430
+ "epoch": 2.76,
4431
+ "learning_rate": 3.1677235065144862e-06,
4432
+ "loss": 0.358,
4433
+ "step": 74000
4434
+ },
4435
+ {
4436
+ "epoch": 2.76,
4437
+ "eval_loss": 0.47646036744117737,
4438
+ "eval_runtime": 1327.3256,
4439
+ "eval_samples_per_second": 0.408,
4440
+ "eval_steps_per_second": 0.408,
4441
+ "step": 74000
4442
+ },
4443
+ {
4444
+ "epoch": 2.78,
4445
+ "learning_rate": 2.6986823819497353e-06,
4446
+ "loss": 0.3653,
4447
+ "step": 74500
4448
+ },
4449
+ {
4450
+ "epoch": 2.78,
4451
+ "eval_loss": 0.47627386450767517,
4452
+ "eval_runtime": 1332.1149,
4453
+ "eval_samples_per_second": 0.407,
4454
+ "eval_steps_per_second": 0.407,
4455
+ "step": 74500
4456
+ },
4457
+ {
4458
+ "epoch": 2.8,
4459
+ "learning_rate": 2.266731544316425e-06,
4460
+ "loss": 0.3743,
4461
+ "step": 75000
4462
+ },
4463
+ {
4464
+ "epoch": 2.8,
4465
+ "eval_loss": 0.47608959674835205,
4466
+ "eval_runtime": 1305.4101,
4467
+ "eval_samples_per_second": 0.415,
4468
+ "eval_steps_per_second": 0.415,
4469
+ "step": 75000
4470
+ },
4471
+ {
4472
+ "epoch": 2.81,
4473
+ "learning_rate": 1.872035648944026e-06,
4474
+ "loss": 0.3659,
4475
+ "step": 75500
4476
+ },
4477
+ {
4478
+ "epoch": 2.81,
4479
+ "eval_loss": 0.476179838180542,
4480
+ "eval_runtime": 1301.8331,
4481
+ "eval_samples_per_second": 0.416,
4482
+ "eval_steps_per_second": 0.416,
4483
+ "step": 75500
4484
+ },
4485
+ {
4486
+ "epoch": 2.83,
4487
+ "learning_rate": 1.5147451499514353e-06,
4488
+ "loss": 0.3678,
4489
+ "step": 76000
4490
+ },
4491
+ {
4492
+ "epoch": 2.83,
4493
+ "eval_loss": 0.4760454595088959,
4494
+ "eval_runtime": 1297.73,
4495
+ "eval_samples_per_second": 0.418,
4496
+ "eval_steps_per_second": 0.418,
4497
+ "step": 76000
4498
+ },
4499
+ {
4500
+ "epoch": 2.85,
4501
+ "learning_rate": 1.1949962428953965e-06,
4502
+ "loss": 0.3672,
4503
+ "step": 76500
4504
+ },
4505
+ {
4506
+ "epoch": 2.85,
4507
+ "eval_loss": 0.4760077893733978,
4508
+ "eval_runtime": 1293.9854,
4509
+ "eval_samples_per_second": 0.419,
4510
+ "eval_steps_per_second": 0.419,
4511
+ "step": 76500
4512
+ },
4513
+ {
4514
+ "epoch": 2.87,
4515
+ "learning_rate": 9.129108128541176e-07,
4516
+ "loss": 0.3658,
4517
+ "step": 77000
4518
+ },
4519
+ {
4520
+ "epoch": 2.87,
4521
+ "eval_loss": 0.47582224011421204,
4522
+ "eval_runtime": 1293.6591,
4523
+ "eval_samples_per_second": 0.419,
4524
+ "eval_steps_per_second": 0.419,
4525
+ "step": 77000
4526
  }
4527
  ],
4528
  "logging_steps": 500,
4529
  "max_steps": 80463,
4530
  "num_train_epochs": 3,
4531
  "save_steps": 500,
4532
+ "total_flos": 2.1626487594830807e+19,
4533
  "trial_name": null,
4534
  "trial_params": null
4535
  }
{checkpoint-69500 β†’ checkpoint-77000}/training_args.bin RENAMED
File without changes
{checkpoint-70000 β†’ checkpoint-77500}/README.md RENAMED
File without changes
{checkpoint-70000 β†’ checkpoint-77500}/adapter_config.json RENAMED
File without changes
checkpoint-77500/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
3
+ size 500897101
{checkpoint-70000 β†’ checkpoint-77500}/adapter_model/README.md RENAMED
File without changes
{checkpoint-70000 β†’ checkpoint-77500}/adapter_model/adapter_config.json RENAMED
File without changes
checkpoint-77500/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
3
+ size 500897101
{checkpoint-69000 β†’ checkpoint-77500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52478f59ec5c65d4db6d79009fc0c477e003ba9db2b5648781779b6963bc40cb
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21a191193d80b976badbd98b5f53b496006b6f2a29af8b6d8dca0b1e0b7ecbe4
3
  size 1001724605
{checkpoint-70500 β†’ checkpoint-77500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a8693cacc78c05a4720cdf55aa732a0282b2cc8d97e8bde33f65f1b59bbf12e
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077d679486328cc243c50097dc890c036a89503397938038ef9689bd7097c327
3
  size 14575
{checkpoint-69000 β†’ checkpoint-77500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f5690258b17f07cbd583d2e586e1be27217d957aa1adadeb296ee58f808a87
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea97f91717d4e226e2157501dd3a83bc130311da389f901511bd22351a008c26
3
  size 627
{checkpoint-69500 β†’ checkpoint-77500}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.47866225242614746,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-69500",
4
- "epoch": 2.591253122553223,
5
  "eval_steps": 500,
6
- "global_step": 69500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4313,13 +4313,237 @@
4313
  "eval_samples_per_second": 0.412,
4314
  "eval_steps_per_second": 0.412,
4315
  "step": 69500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4316
  }
4317
  ],
4318
  "logging_steps": 500,
4319
  "max_steps": 80463,
4320
  "num_train_epochs": 3,
4321
  "save_steps": 500,
4322
- "total_flos": 1.950603151563399e+19,
4323
  "trial_name": null,
4324
  "trial_params": null
4325
  }
 
1
  {
2
+ "best_metric": 0.47582224011421204,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-77000",
4
+ "epoch": 2.889526863278774,
5
  "eval_steps": 500,
6
+ "global_step": 77500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4313
  "eval_samples_per_second": 0.412,
4314
  "eval_steps_per_second": 0.412,
4315
  "step": 69500
4316
+ },
4317
+ {
4318
+ "epoch": 2.61,
4319
+ "learning_rate": 8.230912461650797e-06,
4320
+ "loss": 0.3601,
4321
+ "step": 70000
4322
+ },
4323
+ {
4324
+ "epoch": 2.61,
4325
+ "eval_loss": 0.47838443517684937,
4326
+ "eval_runtime": 1306.7325,
4327
+ "eval_samples_per_second": 0.415,
4328
+ "eval_steps_per_second": 0.415,
4329
+ "step": 70000
4330
+ },
4331
+ {
4332
+ "epoch": 2.63,
4333
+ "learning_rate": 7.472757589080226e-06,
4334
+ "loss": 0.3614,
4335
+ "step": 70500
4336
+ },
4337
+ {
4338
+ "epoch": 2.63,
4339
+ "eval_loss": 0.4780386686325073,
4340
+ "eval_runtime": 1290.4017,
4341
+ "eval_samples_per_second": 0.42,
4342
+ "eval_steps_per_second": 0.42,
4343
+ "step": 70500
4344
+ },
4345
+ {
4346
+ "epoch": 2.65,
4347
+ "learning_rate": 6.749873173827314e-06,
4348
+ "loss": 0.3746,
4349
+ "step": 71000
4350
+ },
4351
+ {
4352
+ "epoch": 2.65,
4353
+ "eval_loss": 0.47773027420043945,
4354
+ "eval_runtime": 1293.7698,
4355
+ "eval_samples_per_second": 0.419,
4356
+ "eval_steps_per_second": 0.419,
4357
+ "step": 71000
4358
+ },
4359
+ {
4360
+ "epoch": 2.67,
4361
+ "learning_rate": 6.0625347721849805e-06,
4362
+ "loss": 0.365,
4363
+ "step": 71500
4364
+ },
4365
+ {
4366
+ "epoch": 2.67,
4367
+ "eval_loss": 0.47759953141212463,
4368
+ "eval_runtime": 1287.2533,
4369
+ "eval_samples_per_second": 0.421,
4370
+ "eval_steps_per_second": 0.421,
4371
+ "step": 71500
4372
+ },
4373
+ {
4374
+ "epoch": 2.68,
4375
+ "learning_rate": 5.411004390662034e-06,
4376
+ "loss": 0.3614,
4377
+ "step": 72000
4378
+ },
4379
+ {
4380
+ "epoch": 2.68,
4381
+ "eval_loss": 0.4774133861064911,
4382
+ "eval_runtime": 1290.2562,
4383
+ "eval_samples_per_second": 0.42,
4384
+ "eval_steps_per_second": 0.42,
4385
+ "step": 72000
4386
+ },
4387
+ {
4388
+ "epoch": 2.7,
4389
+ "learning_rate": 4.795530386109038e-06,
4390
+ "loss": 0.3672,
4391
+ "step": 72500
4392
+ },
4393
+ {
4394
+ "epoch": 2.7,
4395
+ "eval_loss": 0.4771479070186615,
4396
+ "eval_runtime": 1313.3814,
4397
+ "eval_samples_per_second": 0.413,
4398
+ "eval_steps_per_second": 0.413,
4399
+ "step": 72500
4400
+ },
4401
+ {
4402
+ "epoch": 2.72,
4403
+ "learning_rate": 4.2163473710470355e-06,
4404
+ "loss": 0.3536,
4405
+ "step": 73000
4406
+ },
4407
+ {
4408
+ "epoch": 2.72,
4409
+ "eval_loss": 0.4770236909389496,
4410
+ "eval_runtime": 1301.9499,
4411
+ "eval_samples_per_second": 0.416,
4412
+ "eval_steps_per_second": 0.416,
4413
+ "step": 73000
4414
+ },
4415
+ {
4416
+ "epoch": 2.74,
4417
+ "learning_rate": 3.67367612423567e-06,
4418
+ "loss": 0.3693,
4419
+ "step": 73500
4420
+ },
4421
+ {
4422
+ "epoch": 2.74,
4423
+ "eval_loss": 0.4766899645328522,
4424
+ "eval_runtime": 1310.5415,
4425
+ "eval_samples_per_second": 0.414,
4426
+ "eval_steps_per_second": 0.414,
4427
+ "step": 73500
4428
+ },
4429
+ {
4430
+ "epoch": 2.76,
4431
+ "learning_rate": 3.1677235065144862e-06,
4432
+ "loss": 0.358,
4433
+ "step": 74000
4434
+ },
4435
+ {
4436
+ "epoch": 2.76,
4437
+ "eval_loss": 0.47646036744117737,
4438
+ "eval_runtime": 1327.3256,
4439
+ "eval_samples_per_second": 0.408,
4440
+ "eval_steps_per_second": 0.408,
4441
+ "step": 74000
4442
+ },
4443
+ {
4444
+ "epoch": 2.78,
4445
+ "learning_rate": 2.6986823819497353e-06,
4446
+ "loss": 0.3653,
4447
+ "step": 74500
4448
+ },
4449
+ {
4450
+ "epoch": 2.78,
4451
+ "eval_loss": 0.47627386450767517,
4452
+ "eval_runtime": 1332.1149,
4453
+ "eval_samples_per_second": 0.407,
4454
+ "eval_steps_per_second": 0.407,
4455
+ "step": 74500
4456
+ },
4457
+ {
4458
+ "epoch": 2.8,
4459
+ "learning_rate": 2.266731544316425e-06,
4460
+ "loss": 0.3743,
4461
+ "step": 75000
4462
+ },
4463
+ {
4464
+ "epoch": 2.8,
4465
+ "eval_loss": 0.47608959674835205,
4466
+ "eval_runtime": 1305.4101,
4467
+ "eval_samples_per_second": 0.415,
4468
+ "eval_steps_per_second": 0.415,
4469
+ "step": 75000
4470
+ },
4471
+ {
4472
+ "epoch": 2.81,
4473
+ "learning_rate": 1.872035648944026e-06,
4474
+ "loss": 0.3659,
4475
+ "step": 75500
4476
+ },
4477
+ {
4478
+ "epoch": 2.81,
4479
+ "eval_loss": 0.476179838180542,
4480
+ "eval_runtime": 1301.8331,
4481
+ "eval_samples_per_second": 0.416,
4482
+ "eval_steps_per_second": 0.416,
4483
+ "step": 75500
4484
+ },
4485
+ {
4486
+ "epoch": 2.83,
4487
+ "learning_rate": 1.5147451499514353e-06,
4488
+ "loss": 0.3678,
4489
+ "step": 76000
4490
+ },
4491
+ {
4492
+ "epoch": 2.83,
4493
+ "eval_loss": 0.4760454595088959,
4494
+ "eval_runtime": 1297.73,
4495
+ "eval_samples_per_second": 0.418,
4496
+ "eval_steps_per_second": 0.418,
4497
+ "step": 76000
4498
+ },
4499
+ {
4500
+ "epoch": 2.85,
4501
+ "learning_rate": 1.1949962428953965e-06,
4502
+ "loss": 0.3672,
4503
+ "step": 76500
4504
+ },
4505
+ {
4506
+ "epoch": 2.85,
4507
+ "eval_loss": 0.4760077893733978,
4508
+ "eval_runtime": 1293.9854,
4509
+ "eval_samples_per_second": 0.419,
4510
+ "eval_steps_per_second": 0.419,
4511
+ "step": 76500
4512
+ },
4513
+ {
4514
+ "epoch": 2.87,
4515
+ "learning_rate": 9.129108128541176e-07,
4516
+ "loss": 0.3658,
4517
+ "step": 77000
4518
+ },
4519
+ {
4520
+ "epoch": 2.87,
4521
+ "eval_loss": 0.47582224011421204,
4522
+ "eval_runtime": 1293.6591,
4523
+ "eval_samples_per_second": 0.419,
4524
+ "eval_steps_per_second": 0.419,
4525
+ "step": 77000
4526
+ },
4527
+ {
4528
+ "epoch": 2.89,
4529
+ "learning_rate": 6.685963879659362e-07,
4530
+ "loss": 0.3675,
4531
+ "step": 77500
4532
+ },
4533
+ {
4534
+ "epoch": 2.89,
4535
+ "eval_loss": 0.4758478105068207,
4536
+ "eval_runtime": 1311.0096,
4537
+ "eval_samples_per_second": 0.413,
4538
+ "eval_steps_per_second": 0.413,
4539
+ "step": 77500
4540
  }
4541
  ],
4542
  "logging_steps": 500,
4543
  "max_steps": 80463,
4544
  "num_train_epochs": 3,
4545
  "save_steps": 500,
4546
+ "total_flos": 2.1768185355260805e+19,
4547
  "trial_name": null,
4548
  "trial_params": null
4549
  }
{checkpoint-70000 β†’ checkpoint-77500}/training_args.bin RENAMED
File without changes
{checkpoint-70500 β†’ checkpoint-78000}/README.md RENAMED
File without changes
{checkpoint-70500 β†’ checkpoint-78000}/adapter_config.json RENAMED
File without changes
checkpoint-78000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
3
+ size 500897101
{checkpoint-70500 β†’ checkpoint-78000}/adapter_model/README.md RENAMED
File without changes
{checkpoint-70500 β†’ checkpoint-78000}/adapter_model/adapter_config.json RENAMED
File without changes
checkpoint-78000/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
3
+ size 500897101
{checkpoint-70500 β†’ checkpoint-78000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e892af212f732e9530af52f246a0ac8cce7e5fdd232039bb0e4c90cdd7fa3e74
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0ade3b6ce4741aa4976bb9b0aae8b16a0605bea6020968a870961a9dc6cf7f
3
  size 1001724605
{checkpoint-69000 β†’ checkpoint-78000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7732edd0ae5999edb700e14bae64e828df5241beb83fbee05815f6c10b73570
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae952b189b908268d157bce31712cd2487d9ad50a53ef0ae319c8f965d6c13c
3
  size 14575
{checkpoint-70500 β†’ checkpoint-78000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d97c294b2bc2150ad9018c4136e33bcd18ab0fac2dca93dc8eff3b34e709e5be
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495a1512040b6a087956ab3f68fd532032c899c1dae932d93c4b1ba403b50d1d
3
  size 627
{checkpoint-69000 β†’ checkpoint-78000}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4789520502090454,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-69000",
4
- "epoch": 2.5726110137578764,
5
  "eval_steps": 500,
6
- "global_step": 69000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4299,13 +4299,265 @@
4299
  "eval_samples_per_second": 0.403,
4300
  "eval_steps_per_second": 0.403,
4301
  "step": 69000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4302
  }
4303
  ],
4304
  "logging_steps": 500,
4305
  "max_steps": 80463,
4306
  "num_train_epochs": 3,
4307
  "save_steps": 500,
4308
- "total_flos": 1.9364073941589443e+19,
4309
  "trial_name": null,
4310
  "trial_params": null
4311
  }
 
1
  {
2
+ "best_metric": 0.4757947325706482,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-78000",
4
+ "epoch": 2.908168972074121,
5
  "eval_steps": 500,
6
+ "global_step": 78000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4299
  "eval_samples_per_second": 0.403,
4300
  "eval_steps_per_second": 0.403,
4301
  "step": 69000
4302
+ },
4303
+ {
4304
+ "epoch": 2.59,
4305
+ "learning_rate": 9.024048790501272e-06,
4306
+ "loss": 0.3594,
4307
+ "step": 69500
4308
+ },
4309
+ {
4310
+ "epoch": 2.59,
4311
+ "eval_loss": 0.47866225242614746,
4312
+ "eval_runtime": 1316.9883,
4313
+ "eval_samples_per_second": 0.412,
4314
+ "eval_steps_per_second": 0.412,
4315
+ "step": 69500
4316
+ },
4317
+ {
4318
+ "epoch": 2.61,
4319
+ "learning_rate": 8.230912461650797e-06,
4320
+ "loss": 0.3601,
4321
+ "step": 70000
4322
+ },
4323
+ {
4324
+ "epoch": 2.61,
4325
+ "eval_loss": 0.47838443517684937,
4326
+ "eval_runtime": 1306.7325,
4327
+ "eval_samples_per_second": 0.415,
4328
+ "eval_steps_per_second": 0.415,
4329
+ "step": 70000
4330
+ },
4331
+ {
4332
+ "epoch": 2.63,
4333
+ "learning_rate": 7.472757589080226e-06,
4334
+ "loss": 0.3614,
4335
+ "step": 70500
4336
+ },
4337
+ {
4338
+ "epoch": 2.63,
4339
+ "eval_loss": 0.4780386686325073,
4340
+ "eval_runtime": 1290.4017,
4341
+ "eval_samples_per_second": 0.42,
4342
+ "eval_steps_per_second": 0.42,
4343
+ "step": 70500
4344
+ },
4345
+ {
4346
+ "epoch": 2.65,
4347
+ "learning_rate": 6.749873173827314e-06,
4348
+ "loss": 0.3746,
4349
+ "step": 71000
4350
+ },
4351
+ {
4352
+ "epoch": 2.65,
4353
+ "eval_loss": 0.47773027420043945,
4354
+ "eval_runtime": 1293.7698,
4355
+ "eval_samples_per_second": 0.419,
4356
+ "eval_steps_per_second": 0.419,
4357
+ "step": 71000
4358
+ },
4359
+ {
4360
+ "epoch": 2.67,
4361
+ "learning_rate": 6.0625347721849805e-06,
4362
+ "loss": 0.365,
4363
+ "step": 71500
4364
+ },
4365
+ {
4366
+ "epoch": 2.67,
4367
+ "eval_loss": 0.47759953141212463,
4368
+ "eval_runtime": 1287.2533,
4369
+ "eval_samples_per_second": 0.421,
4370
+ "eval_steps_per_second": 0.421,
4371
+ "step": 71500
4372
+ },
4373
+ {
4374
+ "epoch": 2.68,
4375
+ "learning_rate": 5.411004390662034e-06,
4376
+ "loss": 0.3614,
4377
+ "step": 72000
4378
+ },
4379
+ {
4380
+ "epoch": 2.68,
4381
+ "eval_loss": 0.4774133861064911,
4382
+ "eval_runtime": 1290.2562,
4383
+ "eval_samples_per_second": 0.42,
4384
+ "eval_steps_per_second": 0.42,
4385
+ "step": 72000
4386
+ },
4387
+ {
4388
+ "epoch": 2.7,
4389
+ "learning_rate": 4.795530386109038e-06,
4390
+ "loss": 0.3672,
4391
+ "step": 72500
4392
+ },
4393
+ {
4394
+ "epoch": 2.7,
4395
+ "eval_loss": 0.4771479070186615,
4396
+ "eval_runtime": 1313.3814,
4397
+ "eval_samples_per_second": 0.413,
4398
+ "eval_steps_per_second": 0.413,
4399
+ "step": 72500
4400
+ },
4401
+ {
4402
+ "epoch": 2.72,
4403
+ "learning_rate": 4.2163473710470355e-06,
4404
+ "loss": 0.3536,
4405
+ "step": 73000
4406
+ },
4407
+ {
4408
+ "epoch": 2.72,
4409
+ "eval_loss": 0.4770236909389496,
4410
+ "eval_runtime": 1301.9499,
4411
+ "eval_samples_per_second": 0.416,
4412
+ "eval_steps_per_second": 0.416,
4413
+ "step": 73000
4414
+ },
4415
+ {
4416
+ "epoch": 2.74,
4417
+ "learning_rate": 3.67367612423567e-06,
4418
+ "loss": 0.3693,
4419
+ "step": 73500
4420
+ },
4421
+ {
4422
+ "epoch": 2.74,
4423
+ "eval_loss": 0.4766899645328522,
4424
+ "eval_runtime": 1310.5415,
4425
+ "eval_samples_per_second": 0.414,
4426
+ "eval_steps_per_second": 0.414,
4427
+ "step": 73500
4428
+ },
4429
+ {
4430
+ "epoch": 2.76,
4431
+ "learning_rate": 3.1677235065144862e-06,
4432
+ "loss": 0.358,
4433
+ "step": 74000
4434
+ },
4435
+ {
4436
+ "epoch": 2.76,
4437
+ "eval_loss": 0.47646036744117737,
4438
+ "eval_runtime": 1327.3256,
4439
+ "eval_samples_per_second": 0.408,
4440
+ "eval_steps_per_second": 0.408,
4441
+ "step": 74000
4442
+ },
4443
+ {
4444
+ "epoch": 2.78,
4445
+ "learning_rate": 2.6986823819497353e-06,
4446
+ "loss": 0.3653,
4447
+ "step": 74500
4448
+ },
4449
+ {
4450
+ "epoch": 2.78,
4451
+ "eval_loss": 0.47627386450767517,
4452
+ "eval_runtime": 1332.1149,
4453
+ "eval_samples_per_second": 0.407,
4454
+ "eval_steps_per_second": 0.407,
4455
+ "step": 74500
4456
+ },
4457
+ {
4458
+ "epoch": 2.8,
4459
+ "learning_rate": 2.266731544316425e-06,
4460
+ "loss": 0.3743,
4461
+ "step": 75000
4462
+ },
4463
+ {
4464
+ "epoch": 2.8,
4465
+ "eval_loss": 0.47608959674835205,
4466
+ "eval_runtime": 1305.4101,
4467
+ "eval_samples_per_second": 0.415,
4468
+ "eval_steps_per_second": 0.415,
4469
+ "step": 75000
4470
+ },
4471
+ {
4472
+ "epoch": 2.81,
4473
+ "learning_rate": 1.872035648944026e-06,
4474
+ "loss": 0.3659,
4475
+ "step": 75500
4476
+ },
4477
+ {
4478
+ "epoch": 2.81,
4479
+ "eval_loss": 0.476179838180542,
4480
+ "eval_runtime": 1301.8331,
4481
+ "eval_samples_per_second": 0.416,
4482
+ "eval_steps_per_second": 0.416,
4483
+ "step": 75500
4484
+ },
4485
+ {
4486
+ "epoch": 2.83,
4487
+ "learning_rate": 1.5147451499514353e-06,
4488
+ "loss": 0.3678,
4489
+ "step": 76000
4490
+ },
4491
+ {
4492
+ "epoch": 2.83,
4493
+ "eval_loss": 0.4760454595088959,
4494
+ "eval_runtime": 1297.73,
4495
+ "eval_samples_per_second": 0.418,
4496
+ "eval_steps_per_second": 0.418,
4497
+ "step": 76000
4498
+ },
4499
+ {
4500
+ "epoch": 2.85,
4501
+ "learning_rate": 1.1949962428953965e-06,
4502
+ "loss": 0.3672,
4503
+ "step": 76500
4504
+ },
4505
+ {
4506
+ "epoch": 2.85,
4507
+ "eval_loss": 0.4760077893733978,
4508
+ "eval_runtime": 1293.9854,
4509
+ "eval_samples_per_second": 0.419,
4510
+ "eval_steps_per_second": 0.419,
4511
+ "step": 76500
4512
+ },
4513
+ {
4514
+ "epoch": 2.87,
4515
+ "learning_rate": 9.129108128541176e-07,
4516
+ "loss": 0.3658,
4517
+ "step": 77000
4518
+ },
4519
+ {
4520
+ "epoch": 2.87,
4521
+ "eval_loss": 0.47582224011421204,
4522
+ "eval_runtime": 1293.6591,
4523
+ "eval_samples_per_second": 0.419,
4524
+ "eval_steps_per_second": 0.419,
4525
+ "step": 77000
4526
+ },
4527
+ {
4528
+ "epoch": 2.89,
4529
+ "learning_rate": 6.685963879659362e-07,
4530
+ "loss": 0.3675,
4531
+ "step": 77500
4532
+ },
4533
+ {
4534
+ "epoch": 2.89,
4535
+ "eval_loss": 0.4758478105068207,
4536
+ "eval_runtime": 1311.0096,
4537
+ "eval_samples_per_second": 0.413,
4538
+ "eval_steps_per_second": 0.413,
4539
+ "step": 77500
4540
+ },
4541
+ {
4542
+ "epoch": 2.91,
4543
+ "learning_rate": 4.6214609844061894e-07,
4544
+ "loss": 0.3696,
4545
+ "step": 78000
4546
+ },
4547
+ {
4548
+ "epoch": 2.91,
4549
+ "eval_loss": 0.4757947325706482,
4550
+ "eval_runtime": 1268.9631,
4551
+ "eval_samples_per_second": 0.427,
4552
+ "eval_steps_per_second": 0.427,
4553
+ "step": 78000
4554
  }
4555
  ],
4556
  "logging_steps": 500,
4557
  "max_steps": 80463,
4558
  "num_train_epochs": 3,
4559
  "save_steps": 500,
4560
+ "total_flos": 2.1910553027265577e+19,
4561
  "trial_name": null,
4562
  "trial_params": null
4563
  }
{checkpoint-70500 β†’ checkpoint-78000}/training_args.bin RENAMED
File without changes