dimasik87 commited on
Commit
79559ca
1 Parent(s): 0cf3d7d

Training in progress, step 71, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb37c7f1390fda4f25085cb4d2bdd978d0822a3651e127ead75cc2efae5c52f
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789f732f6ae158363be51dcdbef6f8f96975791a165ff7c3b3464144afe7f3ca
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad17691905a0a147fcdaffd2f03ac91b04280e4cef606a4311393b76f6b18fc9
3
  size 180543866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18bdf4ee3f5f194463526ff846ab8b4a350a31a46ea6ff09f036fa51901b049
3
  size 180543866
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98283eb7aa86ea8a9a39a588a7edf0a1aac84a6c5eeea894980663b69e80fc0e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d8ccd21da8722feb50ad01684192384ccf18d632b1f82a8d2d3bc2e16119623
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aec03bf87f279f0836b9767993a70ad750b638d565662d40d6a3ff55df0f361f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e683effdbf1801ca21c52b8334cbe1684c23fef6e33fad16cce510e4700ea65
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.127659574468085,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,153 @@
381
  "eval_samples_per_second": 29.014,
382
  "eval_steps_per_second": 14.507,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -395,12 +542,12 @@
395
  "should_evaluate": false,
396
  "should_log": false,
397
  "should_save": true,
398
- "should_training_stop": false
399
  },
400
  "attributes": {}
401
  }
402
  },
403
- "total_flos": 9788035851878400.0,
404
  "train_batch_size": 2,
405
  "trial_name": null,
406
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.021276595744681,
5
  "eval_steps": 25,
6
+ "global_step": 71,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 29.014,
382
  "eval_steps_per_second": 14.507,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 2.1702127659574466,
387
+ "grad_norm": 4.252004623413086,
388
+ "learning_rate": 1.9868268181037185e-05,
389
+ "loss": 2.5753,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 2.2127659574468086,
394
+ "grad_norm": 4.096494197845459,
395
+ "learning_rate": 1.8057659717401947e-05,
396
+ "loss": 2.25,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 2.25531914893617,
401
+ "grad_norm": 2.2319579124450684,
402
+ "learning_rate": 1.631521781767214e-05,
403
+ "loss": 2.3128,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 2.297872340425532,
408
+ "grad_norm": 2.539842367172241,
409
+ "learning_rate": 1.4644660940672627e-05,
410
+ "loss": 2.2327,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 2.3404255319148937,
415
+ "grad_norm": 3.147387981414795,
416
+ "learning_rate": 1.3049554138967051e-05,
417
+ "loss": 2.1849,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 2.382978723404255,
422
+ "grad_norm": 3.685150146484375,
423
+ "learning_rate": 1.1533301450856054e-05,
424
+ "loss": 2.4277,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 2.425531914893617,
429
+ "grad_norm": 4.715639591217041,
430
+ "learning_rate": 1.0099138635988026e-05,
431
+ "loss": 2.5731,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 2.4680851063829787,
436
+ "grad_norm": 2.3089892864227295,
437
+ "learning_rate": 8.75012627008489e-06,
438
+ "loss": 2.4525,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 2.5106382978723403,
443
+ "grad_norm": 2.630906105041504,
444
+ "learning_rate": 7.489143213519301e-06,
445
+ "loss": 2.1467,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 2.5531914893617023,
450
+ "grad_norm": 3.5077598094940186,
451
+ "learning_rate": 6.318880467681526e-06,
452
+ "loss": 2.4428,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 2.595744680851064,
457
+ "grad_norm": 3.928353786468506,
458
+ "learning_rate": 5.241835432246889e-06,
459
+ "loss": 2.2675,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 2.6382978723404253,
464
+ "grad_norm": 4.511261940002441,
465
+ "learning_rate": 4.260306575598949e-06,
466
+ "loss": 2.2792,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 2.6808510638297873,
471
+ "grad_norm": 2.4075710773468018,
472
+ "learning_rate": 3.376388529782215e-06,
473
+ "loss": 2.3584,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 2.723404255319149,
478
+ "grad_norm": 2.83492112159729,
479
+ "learning_rate": 2.591967620451707e-06,
480
+ "loss": 1.9989,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 2.7659574468085104,
485
+ "grad_norm": 3.5723989009857178,
486
+ "learning_rate": 1.908717841359048e-06,
487
+ "loss": 2.2559,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 2.8085106382978724,
492
+ "grad_norm": 3.7944796085357666,
493
+ "learning_rate": 1.328097281965357e-06,
494
+ "loss": 2.2629,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 2.851063829787234,
499
+ "grad_norm": 4.501901626586914,
500
+ "learning_rate": 8.513450158049108e-07,
501
+ "loss": 2.3413,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 2.8936170212765955,
506
+ "grad_norm": 2.496269464492798,
507
+ "learning_rate": 4.794784562397458e-07,
508
+ "loss": 2.2531,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 2.9361702127659575,
513
+ "grad_norm": 2.9496850967407227,
514
+ "learning_rate": 2.1329118524827662e-07,
515
+ "loss": 2.1338,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 2.978723404255319,
520
+ "grad_norm": 4.252143383026123,
521
+ "learning_rate": 5.3351259881379014e-08,
522
+ "loss": 2.4166,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 3.021276595744681,
527
+ "grad_norm": 7.078902721405029,
528
+ "learning_rate": 0.0,
529
+ "loss": 4.2177,
530
+ "step": 71
531
  }
532
  ],
533
  "logging_steps": 1,
 
542
  "should_evaluate": false,
543
  "should_log": false,
544
  "should_save": true,
545
+ "should_training_stop": true
546
  },
547
  "attributes": {}
548
  }
549
  },
550
+ "total_flos": 1.3899010909667328e+16,
551
  "train_batch_size": 2,
552
  "trial_name": null,
553
  "trial_params": null