leixa commited on
Commit
151476d
·
verified ·
1 Parent(s): 4181d10

Training in progress, step 70, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a0843bbbf1dd7ed15465600c90ad8c9b0c28468a0844e4ebda602129469175d
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc30cece14f478e2af613381fbb617ac45fe1974313654d9fb3b322c6fce4b1e
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c151ab447f416018bacd1d65d6437793b061686c57ce2dfe2820df036bb0abc
3
  size 51418196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544049c4d232dccd5811f61f406354afbeb3ee63151095cf624b3182f2ab0ce4
3
  size 51418196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14af3b7497866f6cf5a5613385292140fe523d104121d66858d29b7a1164be40
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0503fd7fbb1b552a733848d5c4fe83e7d6de89c61f8c3678e5a0147d63b66f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed9a099a9106e0a8d502a4be3b996cb1e9e8205fddd096c1b15024b60866a331
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d249aebe1c6718148a5212a9c894a1a659d316ff657a708e08cb3775b5bc11
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5918367346938775,
5
  "eval_steps": 7,
6
- "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -234,6 +234,28 @@
234
  "eval_samples_per_second": 22.316,
235
  "eval_steps_per_second": 3.188,
236
  "step": 63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  }
238
  ],
239
  "logging_steps": 3,
@@ -253,7 +275,7 @@
253
  "attributes": {}
254
  }
255
  },
256
- "total_flos": 4.642070601085747e+16,
257
  "train_batch_size": 8,
258
  "trial_name": null,
259
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.877551020408163,
5
  "eval_steps": 7,
6
+ "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
234
  "eval_samples_per_second": 22.316,
235
  "eval_steps_per_second": 3.188,
236
  "step": 63
237
+ },
238
+ {
239
+ "epoch": 2.7142857142857144,
240
+ "grad_norm": 1.9079219102859497,
241
+ "learning_rate": 3.8060233744356633e-06,
242
+ "loss": 4.2447,
243
+ "step": 66
244
+ },
245
+ {
246
+ "epoch": 2.836734693877551,
247
+ "grad_norm": 2.1283318996429443,
248
+ "learning_rate": 1.4984373402728014e-06,
249
+ "loss": 3.9816,
250
+ "step": 69
251
+ },
252
+ {
253
+ "epoch": 2.877551020408163,
254
+ "eval_loss": 1.0567643642425537,
255
+ "eval_runtime": 1.8778,
256
+ "eval_samples_per_second": 22.366,
257
+ "eval_steps_per_second": 3.195,
258
+ "step": 70
259
  }
260
  ],
261
  "logging_steps": 3,
 
275
  "attributes": {}
276
  }
277
  },
278
+ "total_flos": 5.157856223428608e+16,
279
  "train_batch_size": 8,
280
  "trial_name": null,
281
  "trial_params": null