Nexspear commited on
Commit
08e596e
·
verified ·
1 Parent(s): 84f630e

Training in progress, step 126, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b6fb9b08c2e10e053f3d81ad8d2e0e24099462e2b5a2472f9154ee3481f0255
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d12559217c7f23d41501f47dca546b26251a84fd89ee55f980fc13aa93f12b
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:760dba386ee4560fcbe21cb431d3d8086e73cb0d13b223b37fde2ea0eec443fa
3
  size 51613348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2fa88fe8784ca0c128965c9ad8a5a8e29ef38b586f101c90b586476e3ed4558
3
  size 51613348
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d87bc4656c379289d41186281c9cbad1876e328c641d66e6bd6c9c084caea09
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac024c06a7fafa2800668c036cb57c23ab71073198d0a72d5ca171de9c65f87b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:200db5d515f7fe0d1c4e0bac456fe6b9042173a9f2b890836ad8c5bd6dfb4d2e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6789ad2e7141c857db1a1b89c580a274f669224bfc6f48fb1745262d15c182
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012583798359612,
5
  "eval_steps": 42,
6
- "global_step": 84,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -227,6 +227,112 @@
227
  "eval_samples_per_second": 35.94,
228
  "eval_steps_per_second": 4.495,
229
  "step": 84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "logging_steps": 3,
@@ -246,7 +352,7 @@
246
  "attributes": {}
247
  }
248
  },
249
- "total_flos": 2.765900651220173e+16,
250
  "train_batch_size": 8,
251
  "trial_name": null,
252
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.018875697539418,
5
  "eval_steps": 42,
6
+ "global_step": 126,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
227
  "eval_samples_per_second": 35.94,
228
  "eval_steps_per_second": 4.495,
229
  "step": 84
230
+ },
231
+ {
232
+ "epoch": 0.013033219729598143,
233
+ "grad_norm": 0.12993893027305603,
234
+ "learning_rate": 4.701488829641845e-05,
235
+ "loss": 1.1313,
236
+ "step": 87
237
+ },
238
+ {
239
+ "epoch": 0.013482641099584285,
240
+ "grad_norm": 0.18705669045448303,
241
+ "learning_rate": 4.678296760308474e-05,
242
+ "loss": 1.2752,
243
+ "step": 90
244
+ },
245
+ {
246
+ "epoch": 0.013932062469570428,
247
+ "grad_norm": 0.14135660231113434,
248
+ "learning_rate": 4.6542988418269876e-05,
249
+ "loss": 1.1628,
250
+ "step": 93
251
+ },
252
+ {
253
+ "epoch": 0.01438148383955657,
254
+ "grad_norm": 0.11707276850938797,
255
+ "learning_rate": 4.629503952098011e-05,
256
+ "loss": 1.0801,
257
+ "step": 96
258
+ },
259
+ {
260
+ "epoch": 0.014830905209542713,
261
+ "grad_norm": 0.14167702198028564,
262
+ "learning_rate": 4.6039212638573833e-05,
263
+ "loss": 1.0452,
264
+ "step": 99
265
+ },
266
+ {
267
+ "epoch": 0.015280326579528856,
268
+ "grad_norm": 0.15983764827251434,
269
+ "learning_rate": 4.5775602412827604e-05,
270
+ "loss": 1.1844,
271
+ "step": 102
272
+ },
273
+ {
274
+ "epoch": 0.015729747949515,
275
+ "grad_norm": 0.12750358879566193,
276
+ "learning_rate": 4.55043063649239e-05,
277
+ "loss": 1.0877,
278
+ "step": 105
279
+ },
280
+ {
281
+ "epoch": 0.01617916931950114,
282
+ "grad_norm": 0.1487520933151245,
283
+ "learning_rate": 4.522542485937369e-05,
284
+ "loss": 1.1373,
285
+ "step": 108
286
+ },
287
+ {
288
+ "epoch": 0.016628590689487285,
289
+ "grad_norm": 0.16089919209480286,
290
+ "learning_rate": 4.493906106688712e-05,
291
+ "loss": 1.1806,
292
+ "step": 111
293
+ },
294
+ {
295
+ "epoch": 0.017078012059473426,
296
+ "grad_norm": 0.158865287899971,
297
+ "learning_rate": 4.4645320926206064e-05,
298
+ "loss": 1.0689,
299
+ "step": 114
300
+ },
301
+ {
302
+ "epoch": 0.01752743342945957,
303
+ "grad_norm": 0.16464003920555115,
304
+ "learning_rate": 4.434431310491267e-05,
305
+ "loss": 1.1319,
306
+ "step": 117
307
+ },
308
+ {
309
+ "epoch": 0.017976854799445715,
310
+ "grad_norm": 0.12468679249286652,
311
+ "learning_rate": 4.4036148959228365e-05,
312
+ "loss": 1.0237,
313
+ "step": 120
314
+ },
315
+ {
316
+ "epoch": 0.018426276169431856,
317
+ "grad_norm": 0.14475342631340027,
318
+ "learning_rate": 4.372094249281821e-05,
319
+ "loss": 1.1352,
320
+ "step": 123
321
+ },
322
+ {
323
+ "epoch": 0.018875697539418,
324
+ "grad_norm": 0.1386341154575348,
325
+ "learning_rate": 4.3398810314615876e-05,
326
+ "loss": 1.177,
327
+ "step": 126
328
+ },
329
+ {
330
+ "epoch": 0.018875697539418,
331
+ "eval_loss": 1.1068843603134155,
332
+ "eval_runtime": 312.5635,
333
+ "eval_samples_per_second": 35.97,
334
+ "eval_steps_per_second": 4.498,
335
+ "step": 126
336
  }
337
  ],
338
  "logging_steps": 3,
 
352
  "attributes": {}
353
  }
354
  },
355
+ "total_flos": 4.03686064914432e+16,
356
  "train_batch_size": 8,
357
  "trial_name": null,
358
  "trial_params": null