leixa commited on
Commit
b54337b
·
verified ·
1 Parent(s): e111ec0

Training in progress, step 74, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc30cece14f478e2af613381fbb617ac45fe1974313654d9fb3b322c6fce4b1e
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2917a15aa5ba9682ed53879d4460ede976f2dd3146789e83b893657ff753d790
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:544049c4d232dccd5811f61f406354afbeb3ee63151095cf624b3182f2ab0ce4
3
  size 51418196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22d1a817857097bae254f7f119849eb180e26964af3ab4f080a5cbfc9303ab97
3
  size 51418196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af0503fd7fbb1b552a733848d5c4fe83e7d6de89c61f8c3678e5a0147d63b66f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:198e42c83d53a6bf92ca7f7c6034bd499cdf81a23127d9a063c1e0a8bdc7b93d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7d249aebe1c6718148a5212a9c894a1a659d316ff657a708e08cb3775b5bc11
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7105a509bdfde59e6e4d893f7ae8de5118e3beb17226743671d553509dc9662b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.877551020408163,
5
  "eval_steps": 7,
6
- "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -256,6 +256,13 @@
256
  "eval_samples_per_second": 22.366,
257
  "eval_steps_per_second": 3.195,
258
  "step": 70
 
 
 
 
 
 
 
259
  }
260
  ],
261
  "logging_steps": 3,
@@ -270,12 +277,12 @@
270
  "should_evaluate": false,
271
  "should_log": false,
272
  "should_save": true,
273
- "should_training_stop": false
274
  },
275
  "attributes": {}
276
  }
277
  },
278
- "total_flos": 5.157856223428608e+16,
279
  "train_batch_size": 8,
280
  "trial_name": null,
281
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0510204081632653,
5
  "eval_steps": 7,
6
+ "global_step": 74,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
256
  "eval_samples_per_second": 22.366,
257
  "eval_steps_per_second": 3.195,
258
  "step": 70
259
+ },
260
+ {
261
+ "epoch": 2.9591836734693877,
262
+ "grad_norm": 1.8807049989700317,
263
+ "learning_rate": 2.407636663901591e-07,
264
+ "loss": 3.7945,
265
+ "step": 72
266
  }
267
  ],
268
  "logging_steps": 3,
 
277
  "should_evaluate": false,
278
  "should_log": false,
279
  "should_save": true,
280
+ "should_training_stop": true
281
  },
282
  "attributes": {}
283
  }
284
  },
285
+ "total_flos": 5.452590864767386e+16,
286
  "train_batch_size": 8,
287
  "trial_name": null,
288
  "trial_params": null