MohamedAhmedAE commited on
Commit
d435078
1 Parent(s): 8473296

Training in progress, step 9800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e10b04549b2c2e70165ffc1d8ae22026b88e479d86fb505c085edccdd341dc3
3
  size 1423793692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d3097201c581d2fc1e0ac63741fdb2046404d91572a71ec0a2d1df566229b9
3
  size 1423793692
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87c564637674bc4ef708b32c27681f7d62f778d13e969fff2a1586db5695deba
3
  size 2847809392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95ea64d249ea2c1d4b60471ad44dd1911880e972b512c7a1a2c13af619a6f036
3
  size 2847809392
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:769069b70f19d1c4a5c8a816b7666b1d3c5714eb9ad7485b3c720d03b9793953
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b67ac0dc56e87afd828d12fa63e2974563d42a9d56163f968e03e8ed24c8b3d
3
  size 13990
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad03c43584b30749232c412b92b2efd3cecbc7fe0311c3b1cbddcf5c0550b7e3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4063efcf27ef2e21873156771b92a5297bf15faaf7ac9fcdd7d3704dc05c483
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16177991260549052,
5
  "eval_steps": 200,
6
- "global_step": 9700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -686,6 +686,13 @@
686
  "learning_rate": 1.9999483906005676e-05,
687
  "loss": 7.1899,
688
  "step": 9700
 
 
 
 
 
 
 
689
  }
690
  ],
691
  "logging_steps": 100,
@@ -705,7 +712,7 @@
705
  "attributes": {}
706
  }
707
  },
708
- "total_flos": 483151756878756.0,
709
  "train_batch_size": 1,
710
  "trial_name": null,
711
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.16344774675606258,
5
  "eval_steps": 200,
6
+ "global_step": 9800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
686
  "learning_rate": 1.9999483906005676e-05,
687
  "loss": 7.1899,
688
  "step": 9700
689
+ },
690
+ {
691
+ "epoch": 0.16344774675606258,
692
+ "grad_norm": 34.184627532958984,
693
+ "learning_rate": 1.9999473204590722e-05,
694
+ "loss": 7.0871,
695
+ "step": 9800
696
  }
697
  ],
698
  "logging_steps": 100,
 
712
  "attributes": {}
713
  }
714
  },
715
+ "total_flos": 487830390535296.0,
716
  "train_batch_size": 1,
717
  "trial_name": null,
718
  "trial_params": null