fedovtt commited on
Commit
4ba26f4
1 Parent(s): 937e3f0

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dc16025718ab1cb4777bcb1aed244af010678663b1fca7fc513df159c35e6ab
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c02662bc21fd5ce451b0e879ebbc1acc30bf97d842bc4a1696230233d98068
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f1aa39e856be778685752f25761e3f9bd3e178c9bff03d7aaeadf451da2a594
3
  size 320194002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a4d35b335c67f000c032e1e7e1356ec5a1f7e69f977817962a63dddf11c9ac
3
  size 320194002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f561a82bea27cf9bdf3a7a427edf3fc8517d927c408784a51f4efc3089462a9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14fe42ed1c83d00f96dc49bc31a9b32fbb3ae79e41e2f931dcb67ac68061c4e3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:040b95482c646935022d148ebc91f462fbf2195cfa3365adbbe3bd6ca1f35a74
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e2b49ea642509f0c688c16fb190b7cf27dac0a18903a5e2d1467d0343d8b8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8583690987124464,
5
  "eval_steps": 4,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -238,6 +238,229 @@
238
  "learning_rate": 0.000138268343236509,
239
  "loss": 3.4839,
240
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  }
242
  ],
243
  "logging_steps": 1,
@@ -252,12 +475,12 @@
252
  "should_evaluate": false,
253
  "should_log": false,
254
  "should_save": true,
255
- "should_training_stop": false
256
  },
257
  "attributes": {}
258
  }
259
  },
260
- "total_flos": 1.306916403806208e+17,
261
  "train_batch_size": 4,
262
  "trial_name": null,
263
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7167381974248928,
5
  "eval_steps": 4,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
238
  "learning_rate": 0.000138268343236509,
239
  "loss": 3.4839,
240
  "step": 25
241
+ },
242
+ {
243
+ "epoch": 0.8927038626609443,
244
+ "grad_norm": 5.970047473907471,
245
+ "learning_rate": 0.00013090169943749476,
246
+ "loss": 3.2252,
247
+ "step": 26
248
+ },
249
+ {
250
+ "epoch": 0.927038626609442,
251
+ "grad_norm": 6.384853839874268,
252
+ "learning_rate": 0.00012334453638559057,
253
+ "loss": 3.4018,
254
+ "step": 27
255
+ },
256
+ {
257
+ "epoch": 0.9613733905579399,
258
+ "grad_norm": 6.741840839385986,
259
+ "learning_rate": 0.0001156434465040231,
260
+ "loss": 3.4371,
261
+ "step": 28
262
+ },
263
+ {
264
+ "epoch": 0.9613733905579399,
265
+ "eval_loss": 0.4020407199859619,
266
+ "eval_runtime": 8.0743,
267
+ "eval_samples_per_second": 6.069,
268
+ "eval_steps_per_second": 1.61,
269
+ "step": 28
270
+ },
271
+ {
272
+ "epoch": 0.9957081545064378,
273
+ "grad_norm": 5.92246675491333,
274
+ "learning_rate": 0.0001078459095727845,
275
+ "loss": 2.4705,
276
+ "step": 29
277
+ },
278
+ {
279
+ "epoch": 1.0300429184549356,
280
+ "grad_norm": 6.4705681800842285,
281
+ "learning_rate": 0.0001,
282
+ "loss": 2.5391,
283
+ "step": 30
284
+ },
285
+ {
286
+ "epoch": 1.0643776824034334,
287
+ "grad_norm": 7.236270427703857,
288
+ "learning_rate": 9.215409042721552e-05,
289
+ "loss": 3.4479,
290
+ "step": 31
291
+ },
292
+ {
293
+ "epoch": 1.0987124463519313,
294
+ "grad_norm": 5.484566688537598,
295
+ "learning_rate": 8.435655349597689e-05,
296
+ "loss": 2.1396,
297
+ "step": 32
298
+ },
299
+ {
300
+ "epoch": 1.0987124463519313,
301
+ "eval_loss": 0.3587982654571533,
302
+ "eval_runtime": 8.0769,
303
+ "eval_samples_per_second": 6.067,
304
+ "eval_steps_per_second": 1.61,
305
+ "step": 32
306
+ },
307
+ {
308
+ "epoch": 1.1330472103004292,
309
+ "grad_norm": 6.789762020111084,
310
+ "learning_rate": 7.66554636144095e-05,
311
+ "loss": 2.4319,
312
+ "step": 33
313
+ },
314
+ {
315
+ "epoch": 1.167381974248927,
316
+ "grad_norm": 5.139922142028809,
317
+ "learning_rate": 6.909830056250527e-05,
318
+ "loss": 2.5539,
319
+ "step": 34
320
+ },
321
+ {
322
+ "epoch": 1.201716738197425,
323
+ "grad_norm": 5.432690143585205,
324
+ "learning_rate": 6.173165676349103e-05,
325
+ "loss": 1.9336,
326
+ "step": 35
327
+ },
328
+ {
329
+ "epoch": 1.2360515021459229,
330
+ "grad_norm": 4.705223560333252,
331
+ "learning_rate": 5.4600950026045326e-05,
332
+ "loss": 1.6073,
333
+ "step": 36
334
+ },
335
+ {
336
+ "epoch": 1.2360515021459229,
337
+ "eval_loss": 0.3393373191356659,
338
+ "eval_runtime": 8.0747,
339
+ "eval_samples_per_second": 6.068,
340
+ "eval_steps_per_second": 1.61,
341
+ "step": 36
342
+ },
343
+ {
344
+ "epoch": 1.2703862660944205,
345
+ "grad_norm": 8.144052505493164,
346
+ "learning_rate": 4.7750143528405126e-05,
347
+ "loss": 2.4886,
348
+ "step": 37
349
+ },
350
+ {
351
+ "epoch": 1.3047210300429184,
352
+ "grad_norm": 5.885042667388916,
353
+ "learning_rate": 4.12214747707527e-05,
354
+ "loss": 2.1173,
355
+ "step": 38
356
+ },
357
+ {
358
+ "epoch": 1.3390557939914163,
359
+ "grad_norm": 5.44212532043457,
360
+ "learning_rate": 3.5055195166981645e-05,
361
+ "loss": 1.5208,
362
+ "step": 39
363
+ },
364
+ {
365
+ "epoch": 1.3733905579399142,
366
+ "grad_norm": 8.440479278564453,
367
+ "learning_rate": 2.9289321881345254e-05,
368
+ "loss": 3.2822,
369
+ "step": 40
370
+ },
371
+ {
372
+ "epoch": 1.3733905579399142,
373
+ "eval_loss": 0.3231399357318878,
374
+ "eval_runtime": 8.0758,
375
+ "eval_samples_per_second": 6.068,
376
+ "eval_steps_per_second": 1.61,
377
+ "step": 40
378
+ },
379
+ {
380
+ "epoch": 1.407725321888412,
381
+ "grad_norm": 6.7061991691589355,
382
+ "learning_rate": 2.3959403439996907e-05,
383
+ "loss": 2.4418,
384
+ "step": 41
385
+ },
386
+ {
387
+ "epoch": 1.44206008583691,
388
+ "grad_norm": 5.881109714508057,
389
+ "learning_rate": 1.9098300562505266e-05,
390
+ "loss": 2.252,
391
+ "step": 42
392
+ },
393
+ {
394
+ "epoch": 1.4763948497854078,
395
+ "grad_norm": 5.9437479972839355,
396
+ "learning_rate": 1.4735983564590783e-05,
397
+ "loss": 1.8867,
398
+ "step": 43
399
+ },
400
+ {
401
+ "epoch": 1.5107296137339055,
402
+ "grad_norm": 5.992803573608398,
403
+ "learning_rate": 1.0899347581163221e-05,
404
+ "loss": 1.7314,
405
+ "step": 44
406
+ },
407
+ {
408
+ "epoch": 1.5107296137339055,
409
+ "eval_loss": 0.31934094429016113,
410
+ "eval_runtime": 8.0874,
411
+ "eval_samples_per_second": 6.059,
412
+ "eval_steps_per_second": 1.607,
413
+ "step": 44
414
+ },
415
+ {
416
+ "epoch": 1.5450643776824036,
417
+ "grad_norm": 6.900696754455566,
418
+ "learning_rate": 7.612046748871327e-06,
419
+ "loss": 2.8172,
420
+ "step": 45
421
+ },
422
+ {
423
+ "epoch": 1.5793991416309012,
424
+ "grad_norm": 4.783636569976807,
425
+ "learning_rate": 4.8943483704846475e-06,
426
+ "loss": 1.6444,
427
+ "step": 46
428
+ },
429
+ {
430
+ "epoch": 1.613733905579399,
431
+ "grad_norm": 7.204476356506348,
432
+ "learning_rate": 2.7630079602323442e-06,
433
+ "loss": 2.3282,
434
+ "step": 47
435
+ },
436
+ {
437
+ "epoch": 1.648068669527897,
438
+ "grad_norm": 5.65119743347168,
439
+ "learning_rate": 1.231165940486234e-06,
440
+ "loss": 1.7266,
441
+ "step": 48
442
+ },
443
+ {
444
+ "epoch": 1.648068669527897,
445
+ "eval_loss": 0.3161769509315491,
446
+ "eval_runtime": 8.1044,
447
+ "eval_samples_per_second": 6.046,
448
+ "eval_steps_per_second": 1.604,
449
+ "step": 48
450
+ },
451
+ {
452
+ "epoch": 1.6824034334763949,
453
+ "grad_norm": 5.6475605964660645,
454
+ "learning_rate": 3.0826662668720364e-07,
455
+ "loss": 1.6549,
456
+ "step": 49
457
+ },
458
+ {
459
+ "epoch": 1.7167381974248928,
460
+ "grad_norm": 9.571765899658203,
461
+ "learning_rate": 0.0,
462
+ "loss": 3.696,
463
+ "step": 50
464
  }
465
  ],
466
  "logging_steps": 1,
 
475
  "should_evaluate": false,
476
  "should_log": false,
477
  "should_save": true,
478
+ "should_training_stop": true
479
  },
480
  "attributes": {}
481
  }
482
  },
483
+ "total_flos": 2.6089318710981427e+17,
484
  "train_batch_size": 4,
485
  "trial_name": null,
486
  "trial_params": null