Femboyuwu2000 commited on
Commit
a719f79
1 Parent(s): 4cc74a6

Training in progress, step 6840, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:720b003068dffa8262fc19d67635315f97ac650b263fd177d92adc68b5e84fe8
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518c66ebd8ad215b6e10e1e9f964e45894e80619dac9fca6659b4f3ddeb3b3fd
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:370ab4cbab2424ee9196b0433c81cdf1630c4c010b17101af78d131d7e230efe
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06d2f34d5afc5a2972610b94f622e8929acb7496b3920789e233305f50c0f6f
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:225da166a3567479b467edbd66cc5540e5fb9e21ad5559249bc60343e8aa02cb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b522addaf0562d6804ca14f05b7612546f36db18c9447cb63a2360b49b82fb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6c398f06d76c43e8804ec85a57336d4caa4a253171be4f5a124c97886f31616
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b9e247c644d88c6b75288b062ce133bd318a7b3c27e9c864410974153f4cd6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5424,
5
  "eval_steps": 500,
6
- "global_step": 6780,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2380,6 +2380,27 @@
2380
  "learning_rate": 2.3094745108254437e-05,
2381
  "loss": 3.5111,
2382
  "step": 6780
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2383
  }
2384
  ],
2385
  "logging_steps": 20,
@@ -2387,7 +2408,7 @@
2387
  "num_input_tokens_seen": 0,
2388
  "num_train_epochs": 2,
2389
  "save_steps": 20,
2390
- "total_flos": 1.6009810231689216e+16,
2391
  "train_batch_size": 8,
2392
  "trial_name": null,
2393
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5472,
5
  "eval_steps": 500,
6
+ "global_step": 6840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2380
  "learning_rate": 2.3094745108254437e-05,
2381
  "loss": 3.5111,
2382
  "step": 6780
2383
+ },
2384
+ {
2385
+ "epoch": 0.54,
2386
+ "grad_norm": 26.23602867126465,
2387
+ "learning_rate": 2.305380260473476e-05,
2388
+ "loss": 3.4637,
2389
+ "step": 6800
2390
+ },
2391
+ {
2392
+ "epoch": 0.55,
2393
+ "grad_norm": 32.71681213378906,
2394
+ "learning_rate": 2.3012775620636747e-05,
2395
+ "loss": 3.4752,
2396
+ "step": 6820
2397
+ },
2398
+ {
2399
+ "epoch": 0.55,
2400
+ "grad_norm": 55.637813568115234,
2401
+ "learning_rate": 2.2971664586314055e-05,
2402
+ "loss": 3.531,
2403
+ "step": 6840
2404
  }
2405
  ],
2406
  "logging_steps": 20,
 
2408
  "num_input_tokens_seen": 0,
2409
  "num_train_epochs": 2,
2410
  "save_steps": 20,
2411
+ "total_flos": 1.6145823545229312e+16,
2412
  "train_batch_size": 8,
2413
  "trial_name": null,
2414
  "trial_params": null