Seosnaps commited on
Commit
50e5869
1 Parent(s): 4018b84

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ece6b48235a9ef14ff0e00b277b6b394ae84dddf28e4707154cced0b42c63971
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ab8c96d034a3c39633f65fbe492a96d5da8cdfe6fee8067059ab5aafb6a82ce
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21658050ca5e4cb30252f38794bee1237ad73dd0a3b8f773171a0b54127b38cf
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bd64a777229dbef4bf82dff14d6cb53c591af1b557e7c7b175c65b32ceda7b5
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efc19516f0bb6ebbb441d01c76bfbe40ffc86ac7def6317731979041e8f3b7ba
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1e95b55e61f2a72e5e5389523fde8c9fb1a2902741a578a17a342a5d7a4df5d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b50b4cb0394cc440ebb08a80906354e23421bb3a354d8e7082be158c31d13dc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3c07fb5d0875fffe56d0b893055e00ce55f7141ff11534660111c35e822163
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 99.84375,
3
- "best_model_checkpoint": "./whisper-small-ha-v9/checkpoint-500",
4
- "epoch": 3.1847133757961785,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,156 @@
157
  "eval_wer": 99.84375,
158
  "eval_wer_ortho": 99.55930254838091,
159
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 25,
@@ -176,7 +326,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 2.30522017775616e+18,
180
  "train_batch_size": 16,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
+ "best_metric": 98.0078125,
3
+ "best_model_checkpoint": "./whisper-small-ha-v9/checkpoint-1000",
4
+ "epoch": 6.369426751592357,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_wer": 99.84375,
158
  "eval_wer_ortho": 99.55930254838091,
159
  "step": 500
160
+ },
161
+ {
162
+ "epoch": 3.343949044585987,
163
+ "grad_norm": 7.7201032638549805,
164
+ "learning_rate": 0.0005,
165
+ "loss": 1.7432,
166
+ "step": 525
167
+ },
168
+ {
169
+ "epoch": 3.5031847133757963,
170
+ "grad_norm": 7.00828742980957,
171
+ "learning_rate": 0.0005,
172
+ "loss": 1.8165,
173
+ "step": 550
174
+ },
175
+ {
176
+ "epoch": 3.662420382165605,
177
+ "grad_norm": 7.801667213439941,
178
+ "learning_rate": 0.0005,
179
+ "loss": 1.8769,
180
+ "step": 575
181
+ },
182
+ {
183
+ "epoch": 3.821656050955414,
184
+ "grad_norm": 6.696052074432373,
185
+ "learning_rate": 0.0005,
186
+ "loss": 1.9189,
187
+ "step": 600
188
+ },
189
+ {
190
+ "epoch": 3.980891719745223,
191
+ "grad_norm": 7.849315643310547,
192
+ "learning_rate": 0.0005,
193
+ "loss": 1.9248,
194
+ "step": 625
195
+ },
196
+ {
197
+ "epoch": 4.140127388535032,
198
+ "grad_norm": 6.837538719177246,
199
+ "learning_rate": 0.0005,
200
+ "loss": 1.3935,
201
+ "step": 650
202
+ },
203
+ {
204
+ "epoch": 4.2993630573248405,
205
+ "grad_norm": 6.533350944519043,
206
+ "learning_rate": 0.0005,
207
+ "loss": 1.3859,
208
+ "step": 675
209
+ },
210
+ {
211
+ "epoch": 4.45859872611465,
212
+ "grad_norm": 7.38162899017334,
213
+ "learning_rate": 0.0005,
214
+ "loss": 1.4676,
215
+ "step": 700
216
+ },
217
+ {
218
+ "epoch": 4.617834394904459,
219
+ "grad_norm": 7.072102069854736,
220
+ "learning_rate": 0.0005,
221
+ "loss": 1.5653,
222
+ "step": 725
223
+ },
224
+ {
225
+ "epoch": 4.777070063694268,
226
+ "grad_norm": 7.383370399475098,
227
+ "learning_rate": 0.0005,
228
+ "loss": 1.6075,
229
+ "step": 750
230
+ },
231
+ {
232
+ "epoch": 4.936305732484076,
233
+ "grad_norm": 6.835177898406982,
234
+ "learning_rate": 0.0005,
235
+ "loss": 1.6546,
236
+ "step": 775
237
+ },
238
+ {
239
+ "epoch": 5.095541401273885,
240
+ "grad_norm": 6.938526153564453,
241
+ "learning_rate": 0.0005,
242
+ "loss": 1.3298,
243
+ "step": 800
244
+ },
245
+ {
246
+ "epoch": 5.254777070063694,
247
+ "grad_norm": 7.478129863739014,
248
+ "learning_rate": 0.0005,
249
+ "loss": 1.2336,
250
+ "step": 825
251
+ },
252
+ {
253
+ "epoch": 5.414012738853503,
254
+ "grad_norm": 6.950467109680176,
255
+ "learning_rate": 0.0005,
256
+ "loss": 1.2652,
257
+ "step": 850
258
+ },
259
+ {
260
+ "epoch": 5.573248407643312,
261
+ "grad_norm": 8.092499732971191,
262
+ "learning_rate": 0.0005,
263
+ "loss": 1.4009,
264
+ "step": 875
265
+ },
266
+ {
267
+ "epoch": 5.732484076433121,
268
+ "grad_norm": 6.488431930541992,
269
+ "learning_rate": 0.0005,
270
+ "loss": 1.3538,
271
+ "step": 900
272
+ },
273
+ {
274
+ "epoch": 5.89171974522293,
275
+ "grad_norm": 6.798085689544678,
276
+ "learning_rate": 0.0005,
277
+ "loss": 1.4269,
278
+ "step": 925
279
+ },
280
+ {
281
+ "epoch": 6.050955414012739,
282
+ "grad_norm": 6.3589043617248535,
283
+ "learning_rate": 0.0005,
284
+ "loss": 1.2951,
285
+ "step": 950
286
+ },
287
+ {
288
+ "epoch": 6.210191082802548,
289
+ "grad_norm": 6.020321369171143,
290
+ "learning_rate": 0.0005,
291
+ "loss": 1.0916,
292
+ "step": 975
293
+ },
294
+ {
295
+ "epoch": 6.369426751592357,
296
+ "grad_norm": 6.381227493286133,
297
+ "learning_rate": 0.0005,
298
+ "loss": 1.1623,
299
+ "step": 1000
300
+ },
301
+ {
302
+ "epoch": 6.369426751592357,
303
+ "eval_loss": 4.484735488891602,
304
+ "eval_runtime": 243.1604,
305
+ "eval_samples_per_second": 2.714,
306
+ "eval_steps_per_second": 0.173,
307
+ "eval_wer": 98.0078125,
308
+ "eval_wer_ortho": 97.75819122437248,
309
+ "step": 1000
310
  }
311
  ],
312
  "logging_steps": 25,
 
326
  "attributes": {}
327
  }
328
  },
329
+ "total_flos": 4.61044035551232e+18,
330
  "train_batch_size": 16,
331
  "trial_name": null,
332
  "trial_params": null