MohamedAhmedAE commited on
Commit
1f00134
1 Parent(s): 73f58b7

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b533ba61614af35ac91cced1f614248e8154ed0dd46572e8c699448f56030c76
3
  size 1423793692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fafbc4b695fd8bfdea32039508732f28aad09b914411f7b745968d96db5a527d
3
  size 1423793692
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee647db17a124f48c017f66c388e1fc846268a7b9f1858edd0dabf5b598223d3
3
  size 2847809392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ee2b5f2695e677f3f7d19bd6c7ee32347c3d70682d9581e9db11b0868ae4b3
3
  size 2847809392
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a084e546fd6d851f7063fec6f082b51f0aceba0b14196f963fddc59794a486
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f0307c22c9eaef5bdd0acdfa0f36120192eebe56339a218faa4b8fc8466c182
3
  size 13990
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00dfa93d58e416a9ddd98713a7f0497854ec0cf4ca81b8b7df525d8239e1008f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:492c71fdfddaa74e5ff08b634deba26aa598bdfa223039762d4d422cf8dc7688
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.02334967810800894,
5
  "eval_steps": 200,
6
- "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -105,6 +105,13 @@
105
  "learning_rate": 1.999998931473612e-05,
106
  "loss": 7.6066,
107
  "step": 1400
 
 
 
 
 
 
 
108
  }
109
  ],
110
  "logging_steps": 100,
@@ -124,7 +131,7 @@
124
  "attributes": {}
125
  }
126
  },
127
- "total_flos": 67825624179654.0,
128
  "train_batch_size": 1,
129
  "trial_name": null,
130
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.025017512258581006,
5
  "eval_steps": 200,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
105
  "learning_rate": 1.999998931473612e-05,
106
  "loss": 7.6066,
107
  "step": 1400
108
+ },
109
+ {
110
+ "epoch": 0.025017512258581006,
111
+ "grad_norm": 14.43734073638916,
112
+ "learning_rate": 1.9999987727890814e-05,
113
+ "loss": 7.6378,
114
+ "step": 1500
115
  }
116
  ],
117
  "logging_steps": 100,
 
131
  "attributes": {}
132
  }
133
  },
134
+ "total_flos": 72658998638064.0,
135
  "train_batch_size": 1,
136
  "trial_name": null,
137
  "trial_params": null