fats-fme commited on
Commit
feb87cf
·
verified ·
1 Parent(s): 8169e25

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0356e2d45181d9bfc59eef8268a1682cedbf6a4fe1d1962dc8fc4b60fc0de8a6
3
  size 90365754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f801add4c0d2e08fe2b55e70f53bea6aef21a176c0657a3e820c530d88b29ca
3
  size 90365754
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1f464f5148cadad2eb6ce0c98da4db9cbea74da6558df564b870bb2d05b79a
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a607a89f29f579bb7e3b03d01051406f784e1383419ed5b710137e9d12493e
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4076be35501e6f142268aafb53d7dd402a55fe1fab08306a4a1b945caeef0c2b
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec16933bc3b62330056ce3e53e0d229a8971f7e3969b0b72440ffdd637e6216
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88de868c96c0a10245aeff16f0aa4d46e50213f783416c2c0d07e645991cb38e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a73e1ff9beffc13aa54f4adf4df9ed4ad8819cc503c53ddfd100ef74e91d520
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25477707006369427,
5
  "eval_steps": 15,
6
- "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -128,6 +128,119 @@
128
  "eval_samples_per_second": 23.719,
129
  "eval_steps_per_second": 5.93,
130
  "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 1,
@@ -147,7 +260,7 @@
147
  "attributes": {}
148
  }
149
  },
150
- "total_flos": 5806335417384960.0,
151
  "train_batch_size": 2,
152
  "trial_name": null,
153
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5095541401273885,
5
  "eval_steps": 15,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
128
  "eval_samples_per_second": 23.719,
129
  "eval_steps_per_second": 5.93,
130
  "step": 15
131
+ },
132
+ {
133
+ "epoch": 0.27176220806794055,
134
+ "grad_norm": NaN,
135
+ "learning_rate": 3.2000000000000005e-05,
136
+ "loss": 0.0,
137
+ "step": 16
138
+ },
139
+ {
140
+ "epoch": 0.28874734607218683,
141
+ "grad_norm": NaN,
142
+ "learning_rate": 3.4000000000000007e-05,
143
+ "loss": 0.0,
144
+ "step": 17
145
+ },
146
+ {
147
+ "epoch": 0.3057324840764331,
148
+ "grad_norm": NaN,
149
+ "learning_rate": 3.6e-05,
150
+ "loss": 0.0,
151
+ "step": 18
152
+ },
153
+ {
154
+ "epoch": 0.3227176220806794,
155
+ "grad_norm": NaN,
156
+ "learning_rate": 3.8e-05,
157
+ "loss": 0.0,
158
+ "step": 19
159
+ },
160
+ {
161
+ "epoch": 0.33970276008492567,
162
+ "grad_norm": NaN,
163
+ "learning_rate": 4e-05,
164
+ "loss": 0.0,
165
+ "step": 20
166
+ },
167
+ {
168
+ "epoch": 0.35668789808917195,
169
+ "grad_norm": NaN,
170
+ "learning_rate": 4.2e-05,
171
+ "loss": 0.0,
172
+ "step": 21
173
+ },
174
+ {
175
+ "epoch": 0.37367303609341823,
176
+ "grad_norm": NaN,
177
+ "learning_rate": 4.4000000000000006e-05,
178
+ "loss": 0.0,
179
+ "step": 22
180
+ },
181
+ {
182
+ "epoch": 0.39065817409766457,
183
+ "grad_norm": NaN,
184
+ "learning_rate": 4.600000000000001e-05,
185
+ "loss": 0.0,
186
+ "step": 23
187
+ },
188
+ {
189
+ "epoch": 0.40764331210191085,
190
+ "grad_norm": NaN,
191
+ "learning_rate": 4.8e-05,
192
+ "loss": 0.0,
193
+ "step": 24
194
+ },
195
+ {
196
+ "epoch": 0.42462845010615713,
197
+ "grad_norm": NaN,
198
+ "learning_rate": 5e-05,
199
+ "loss": 0.0,
200
+ "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.4416135881104034,
204
+ "grad_norm": NaN,
205
+ "learning_rate": 5.2000000000000004e-05,
206
+ "loss": 0.0,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.4585987261146497,
211
+ "grad_norm": NaN,
212
+ "learning_rate": 5.4000000000000005e-05,
213
+ "loss": 0.0,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.47558386411889597,
218
+ "grad_norm": NaN,
219
+ "learning_rate": 5.6000000000000006e-05,
220
+ "loss": 0.0,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.49256900212314225,
225
+ "grad_norm": NaN,
226
+ "learning_rate": 5.8e-05,
227
+ "loss": 0.0,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.5095541401273885,
232
+ "grad_norm": NaN,
233
+ "learning_rate": 6e-05,
234
+ "loss": 0.0,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.5095541401273885,
239
+ "eval_loss": NaN,
240
+ "eval_runtime": 4.2146,
241
+ "eval_samples_per_second": 23.727,
242
+ "eval_steps_per_second": 5.932,
243
+ "step": 30
244
  }
245
  ],
246
  "logging_steps": 1,
 
260
  "attributes": {}
261
  }
262
  },
263
+ "total_flos": 1.161267083476992e+16,
264
  "train_batch_size": 2,
265
  "trial_name": null,
266
  "trial_params": null