Seosnaps commited on
Commit
cf8af4a
1 Parent(s): 9fba84f

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dff8e9648a181521563cd2c06b216c20674c53129b91f4e30fe648660ed514d6
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a3787cc1d2a3960f00ca0c5b66c6dd4ff41d7324bd299aadcac7459ea8c099
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd08307cf03ac19299af59cb7ad10e1c265d9a417ae1d181bddb3541af024831
3
  size 1925070764
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a95a4e858f09457edb963cf83b8981e6338abbab0762c9a560a5d5242aedb72
3
  size 1925070764
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7036c9e83c0a40e3eae7c28323a1ccbf94a9756c65c84f64364a471049656fc9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30912ac1337a75f3290dd2f00c6220273cdcb51a36b541da912118f267733f2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a40ecfa3f3fd838108dc1603326181f7cc7fa9cd66a24dab20698df0575be28
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04fe46db27f239a414db1d5d90722d80220853d3e644018ca60e784cd72b6710
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 88.17781184134891,
3
- "best_model_checkpoint": "./whisper-small-ha-adam-v4/checkpoint-500",
4
- "epoch": 3.1847133757961785,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,156 @@
157
  "eval_wer": 88.17781184134891,
158
  "eval_wer_ortho": 90.4296875,
159
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 25,
@@ -176,7 +326,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 2.30522017775616e+18,
180
  "train_batch_size": 16,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
+ "best_metric": 81.14581337420962,
3
+ "best_model_checkpoint": "./whisper-small-ha-adam-v4/checkpoint-1000",
4
+ "epoch": 6.369426751592357,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_wer": 88.17781184134891,
158
  "eval_wer_ortho": 90.4296875,
159
  "step": 500
160
+ },
161
+ {
162
+ "epoch": 3.343949044585987,
163
+ "grad_norm": 4.324921607971191,
164
+ "learning_rate": 5e-05,
165
+ "loss": 0.1136,
166
+ "step": 525
167
+ },
168
+ {
169
+ "epoch": 3.5031847133757963,
170
+ "grad_norm": 3.88926100730896,
171
+ "learning_rate": 5e-05,
172
+ "loss": 0.117,
173
+ "step": 550
174
+ },
175
+ {
176
+ "epoch": 3.662420382165605,
177
+ "grad_norm": 4.270689964294434,
178
+ "learning_rate": 5e-05,
179
+ "loss": 0.1093,
180
+ "step": 575
181
+ },
182
+ {
183
+ "epoch": 3.821656050955414,
184
+ "grad_norm": 5.100738048553467,
185
+ "learning_rate": 5e-05,
186
+ "loss": 0.1185,
187
+ "step": 600
188
+ },
189
+ {
190
+ "epoch": 3.980891719745223,
191
+ "grad_norm": 5.05330753326416,
192
+ "learning_rate": 5e-05,
193
+ "loss": 0.1377,
194
+ "step": 625
195
+ },
196
+ {
197
+ "epoch": 4.140127388535032,
198
+ "grad_norm": 3.5940771102905273,
199
+ "learning_rate": 5e-05,
200
+ "loss": 0.0823,
201
+ "step": 650
202
+ },
203
+ {
204
+ "epoch": 4.2993630573248405,
205
+ "grad_norm": 3.304124355316162,
206
+ "learning_rate": 5e-05,
207
+ "loss": 0.0763,
208
+ "step": 675
209
+ },
210
+ {
211
+ "epoch": 4.45859872611465,
212
+ "grad_norm": 3.3643264770507812,
213
+ "learning_rate": 5e-05,
214
+ "loss": 0.0758,
215
+ "step": 700
216
+ },
217
+ {
218
+ "epoch": 4.617834394904459,
219
+ "grad_norm": 3.0554709434509277,
220
+ "learning_rate": 5e-05,
221
+ "loss": 0.084,
222
+ "step": 725
223
+ },
224
+ {
225
+ "epoch": 4.777070063694268,
226
+ "grad_norm": 5.5209174156188965,
227
+ "learning_rate": 5e-05,
228
+ "loss": 0.0816,
229
+ "step": 750
230
+ },
231
+ {
232
+ "epoch": 4.936305732484076,
233
+ "grad_norm": 5.73225736618042,
234
+ "learning_rate": 5e-05,
235
+ "loss": 0.1064,
236
+ "step": 775
237
+ },
238
+ {
239
+ "epoch": 5.095541401273885,
240
+ "grad_norm": 2.312955379486084,
241
+ "learning_rate": 5e-05,
242
+ "loss": 0.0697,
243
+ "step": 800
244
+ },
245
+ {
246
+ "epoch": 5.254777070063694,
247
+ "grad_norm": 5.729402542114258,
248
+ "learning_rate": 5e-05,
249
+ "loss": 0.0639,
250
+ "step": 825
251
+ },
252
+ {
253
+ "epoch": 5.414012738853503,
254
+ "grad_norm": 3.784353733062744,
255
+ "learning_rate": 5e-05,
256
+ "loss": 0.0733,
257
+ "step": 850
258
+ },
259
+ {
260
+ "epoch": 5.573248407643312,
261
+ "grad_norm": 2.1651360988616943,
262
+ "learning_rate": 5e-05,
263
+ "loss": 0.0774,
264
+ "step": 875
265
+ },
266
+ {
267
+ "epoch": 5.732484076433121,
268
+ "grad_norm": 4.008174419403076,
269
+ "learning_rate": 5e-05,
270
+ "loss": 0.0653,
271
+ "step": 900
272
+ },
273
+ {
274
+ "epoch": 5.89171974522293,
275
+ "grad_norm": 4.1395978927612305,
276
+ "learning_rate": 5e-05,
277
+ "loss": 0.0892,
278
+ "step": 925
279
+ },
280
+ {
281
+ "epoch": 6.050955414012739,
282
+ "grad_norm": 3.262603998184204,
283
+ "learning_rate": 5e-05,
284
+ "loss": 0.0573,
285
+ "step": 950
286
+ },
287
+ {
288
+ "epoch": 6.210191082802548,
289
+ "grad_norm": 2.6815080642700195,
290
+ "learning_rate": 5e-05,
291
+ "loss": 0.0646,
292
+ "step": 975
293
+ },
294
+ {
295
+ "epoch": 6.369426751592357,
296
+ "grad_norm": 1.4305006265640259,
297
+ "learning_rate": 5e-05,
298
+ "loss": 0.0468,
299
+ "step": 1000
300
+ },
301
+ {
302
+ "epoch": 6.369426751592357,
303
+ "eval_loss": 1.959425926208496,
304
+ "eval_runtime": 259.4756,
305
+ "eval_samples_per_second": 2.544,
306
+ "eval_steps_per_second": 0.162,
307
+ "eval_wer": 81.14581337420962,
308
+ "eval_wer_ortho": 82.83203125,
309
+ "step": 1000
310
  }
311
  ],
312
  "logging_steps": 25,
 
326
  "attributes": {}
327
  }
328
  },
329
+ "total_flos": 4.61044035551232e+18,
330
  "train_batch_size": 16,
331
  "trial_name": null,
332
  "trial_params": null