lesso11 commited on
Commit
d9cd933
·
verified ·
1 Parent(s): 6cde69e

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af0d6b1086a49aef2d31d23ab5780a24495378ae0872c9c98ff57e609c344b2
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d42efacb6ebae8205c8b5fa635169a9fda0316599b000306d2e790e5ebab7c
3
  size 335922386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5cce30f5051c71508ef13842ca510544ad5db32f727ed0aacc39148ef96b628
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dccd2209ce0a00fd7d294f9f7e44828a38fbb2c754d6bb490a588c320684b5f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2780c7a8ea8ca5a008997203cab6c9a49a49740b423ff45de4d7032e0ce20792
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d65e9c686bf7672c60c2e716324e2ec3b9e362aa67293892146275e003a9f911
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9615384615384616,
5
  "eval_steps": 7,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -214,6 +214,213 @@
214
  "learning_rate": 8.81342589055191e-05,
215
  "loss": 0.0,
216
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  }
218
  ],
219
  "logging_steps": 1,
@@ -233,7 +440,7 @@
233
  "attributes": {}
234
  }
235
  },
236
- "total_flos": 3.70943641780224e+16,
237
  "train_batch_size": 8,
238
  "trial_name": null,
239
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9230769230769231,
5
  "eval_steps": 7,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
214
  "learning_rate": 8.81342589055191e-05,
215
  "loss": 0.0,
216
  "step": 25
217
+ },
218
+ {
219
+ "epoch": 1.0,
220
+ "grad_norm": NaN,
221
+ "learning_rate": 8.657656676318346e-05,
222
+ "loss": 0.0,
223
+ "step": 26
224
+ },
225
+ {
226
+ "epoch": 1.0384615384615385,
227
+ "grad_norm": NaN,
228
+ "learning_rate": 8.493847138894209e-05,
229
+ "loss": 0.0,
230
+ "step": 27
231
+ },
232
+ {
233
+ "epoch": 1.0769230769230769,
234
+ "grad_norm": NaN,
235
+ "learning_rate": 8.322357367194109e-05,
236
+ "loss": 0.0,
237
+ "step": 28
238
+ },
239
+ {
240
+ "epoch": 1.0769230769230769,
241
+ "eval_loss": NaN,
242
+ "eval_runtime": 4.8674,
243
+ "eval_samples_per_second": 4.52,
244
+ "eval_steps_per_second": 0.616,
245
+ "step": 28
246
+ },
247
+ {
248
+ "epoch": 1.1153846153846154,
249
+ "grad_norm": NaN,
250
+ "learning_rate": 8.143564332954425e-05,
251
+ "loss": 0.0,
252
+ "step": 29
253
+ },
254
+ {
255
+ "epoch": 1.1538461538461537,
256
+ "grad_norm": NaN,
257
+ "learning_rate": 7.957861062067614e-05,
258
+ "loss": 0.0,
259
+ "step": 30
260
+ },
261
+ {
262
+ "epoch": 1.1923076923076923,
263
+ "grad_norm": NaN,
264
+ "learning_rate": 7.765655770625997e-05,
265
+ "loss": 0.0,
266
+ "step": 31
267
+ },
268
+ {
269
+ "epoch": 1.2307692307692308,
270
+ "grad_norm": NaN,
271
+ "learning_rate": 7.56737096757421e-05,
272
+ "loss": 0.0,
273
+ "step": 32
274
+ },
275
+ {
276
+ "epoch": 1.2692307692307692,
277
+ "grad_norm": NaN,
278
+ "learning_rate": 7.363442525942826e-05,
279
+ "loss": 0.0,
280
+ "step": 33
281
+ },
282
+ {
283
+ "epoch": 1.3076923076923077,
284
+ "grad_norm": NaN,
285
+ "learning_rate": 7.154318724704853e-05,
286
+ "loss": 0.0,
287
+ "step": 34
288
+ },
289
+ {
290
+ "epoch": 1.3461538461538463,
291
+ "grad_norm": NaN,
292
+ "learning_rate": 6.940459263361249e-05,
293
+ "loss": 0.0,
294
+ "step": 35
295
+ },
296
+ {
297
+ "epoch": 1.3461538461538463,
298
+ "eval_loss": NaN,
299
+ "eval_runtime": 4.8653,
300
+ "eval_samples_per_second": 4.522,
301
+ "eval_steps_per_second": 0.617,
302
+ "step": 35
303
+ },
304
+ {
305
+ "epoch": 1.3846153846153846,
306
+ "grad_norm": NaN,
307
+ "learning_rate": 6.722334251421665e-05,
308
+ "loss": 0.0,
309
+ "step": 36
310
+ },
311
+ {
312
+ "epoch": 1.4230769230769231,
313
+ "grad_norm": NaN,
314
+ "learning_rate": 6.500423175001705e-05,
315
+ "loss": 0.0,
316
+ "step": 37
317
+ },
318
+ {
319
+ "epoch": 1.4615384615384617,
320
+ "grad_norm": NaN,
321
+ "learning_rate": 6.275213842808383e-05,
322
+ "loss": 0.0,
323
+ "step": 38
324
+ },
325
+ {
326
+ "epoch": 1.5,
327
+ "grad_norm": NaN,
328
+ "learning_rate": 6.0472013138307235e-05,
329
+ "loss": 0.0,
330
+ "step": 39
331
+ },
332
+ {
333
+ "epoch": 1.5384615384615383,
334
+ "grad_norm": NaN,
335
+ "learning_rate": 5.816886809092651e-05,
336
+ "loss": 0.0,
337
+ "step": 40
338
+ },
339
+ {
340
+ "epoch": 1.5769230769230769,
341
+ "grad_norm": NaN,
342
+ "learning_rate": 5.584776609860414e-05,
343
+ "loss": 0.0,
344
+ "step": 41
345
+ },
346
+ {
347
+ "epoch": 1.6153846153846154,
348
+ "grad_norm": NaN,
349
+ "learning_rate": 5.351380944726465e-05,
350
+ "loss": 0.0,
351
+ "step": 42
352
+ },
353
+ {
354
+ "epoch": 1.6153846153846154,
355
+ "eval_loss": NaN,
356
+ "eval_runtime": 4.8664,
357
+ "eval_samples_per_second": 4.521,
358
+ "eval_steps_per_second": 0.616,
359
+ "step": 42
360
+ },
361
+ {
362
+ "epoch": 1.6538461538461537,
363
+ "grad_norm": NaN,
364
+ "learning_rate": 5.117212868016303e-05,
365
+ "loss": 0.0,
366
+ "step": 43
367
+ },
368
+ {
369
+ "epoch": 1.6923076923076923,
370
+ "grad_norm": NaN,
371
+ "learning_rate": 4.882787131983698e-05,
372
+ "loss": 0.0,
373
+ "step": 44
374
+ },
375
+ {
376
+ "epoch": 1.7307692307692308,
377
+ "grad_norm": NaN,
378
+ "learning_rate": 4.648619055273537e-05,
379
+ "loss": 0.0,
380
+ "step": 45
381
+ },
382
+ {
383
+ "epoch": 1.7692307692307692,
384
+ "grad_norm": NaN,
385
+ "learning_rate": 4.415223390139588e-05,
386
+ "loss": 0.0,
387
+ "step": 46
388
+ },
389
+ {
390
+ "epoch": 1.8076923076923077,
391
+ "grad_norm": NaN,
392
+ "learning_rate": 4.183113190907349e-05,
393
+ "loss": 0.0,
394
+ "step": 47
395
+ },
396
+ {
397
+ "epoch": 1.8461538461538463,
398
+ "grad_norm": NaN,
399
+ "learning_rate": 3.952798686169279e-05,
400
+ "loss": 0.0,
401
+ "step": 48
402
+ },
403
+ {
404
+ "epoch": 1.8846153846153846,
405
+ "grad_norm": NaN,
406
+ "learning_rate": 3.7247861571916185e-05,
407
+ "loss": 0.0,
408
+ "step": 49
409
+ },
410
+ {
411
+ "epoch": 1.8846153846153846,
412
+ "eval_loss": NaN,
413
+ "eval_runtime": 4.8653,
414
+ "eval_samples_per_second": 4.522,
415
+ "eval_steps_per_second": 0.617,
416
+ "step": 49
417
+ },
418
+ {
419
+ "epoch": 1.9230769230769231,
420
+ "grad_norm": NaN,
421
+ "learning_rate": 3.499576824998298e-05,
422
+ "loss": 0.0,
423
+ "step": 50
424
  }
425
  ],
426
  "logging_steps": 1,
 
440
  "attributes": {}
441
  }
442
  },
443
+ "total_flos": 7.363231289337446e+16,
444
  "train_batch_size": 8,
445
  "trial_name": null,
446
  "trial_params": null