yuweiiizz commited on
Commit
f288f46
1 Parent(s): b14cd8f

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6272cdab34ad742ff5d8928b1a3b73208418c7bfce7d87a0db8c4712d83527d
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:356aa951308cefb9925541a7565a23ebcfe9ff3c00faddd518dc705f7380d87c
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7de0ef00f4d8d9f4fa9b0e18f25384399f28c54b520f62c3dd31eca12ff60e2
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2d7b2162db187feb649c1155e77b58af89b8abb3d58f0381a5a7e9a473ce4f
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bcd75decc8ec809bdd000c1a023eecd569d9a9775fe640822926fa2ab60021b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d5ec1a6108a45f66ba00113edd4b4b9f89042f06bef4dde01cea9a8d8b8ca0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64cb8f98c15d4c048d873b0e5f521cf693b6283121ee23f325cb6b6e1b684e58
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ff39835f6fc0ab5432bbe426fb9e017df9b359be5fad96f2d6b3dde669dc72
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 47.53661784287617,
3
  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
4
- "epoch": 3.2,
5
  "eval_steps": 1000,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2319,6 +2319,295 @@
2319
  "eval_samples_per_second": 2.27,
2320
  "eval_steps_per_second": 0.284,
2321
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2322
  }
2323
  ],
2324
  "logging_steps": 25,
@@ -2326,7 +2615,7 @@
2326
  "num_input_tokens_seen": 0,
2327
  "num_train_epochs": 4,
2328
  "save_steps": 1000,
2329
- "total_flos": 3.693893124096e+19,
2330
  "train_batch_size": 8,
2331
  "trial_name": null,
2332
  "trial_params": null
 
1
  {
2
  "best_metric": 47.53661784287617,
3
  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
4
+ "epoch": 3.6,
5
  "eval_steps": 1000,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2319
  "eval_samples_per_second": 2.27,
2320
  "eval_steps_per_second": 0.284,
2321
  "step": 8000
2322
+ },
2323
+ {
2324
+ "epoch": 3.21,
2325
+ "grad_norm": 11.635475158691406,
2326
+ "learning_rate": 1.2037037037037037e-06,
2327
+ "loss": 0.3935,
2328
+ "step": 8025
2329
+ },
2330
+ {
2331
+ "epoch": 3.22,
2332
+ "grad_norm": 9.14508056640625,
2333
+ "learning_rate": 1.1728395061728396e-06,
2334
+ "loss": 0.3949,
2335
+ "step": 8050
2336
+ },
2337
+ {
2338
+ "epoch": 3.23,
2339
+ "grad_norm": 7.534052848815918,
2340
+ "learning_rate": 1.1419753086419754e-06,
2341
+ "loss": 0.462,
2342
+ "step": 8075
2343
+ },
2344
+ {
2345
+ "epoch": 3.24,
2346
+ "grad_norm": 10.633529663085938,
2347
+ "learning_rate": 1.111111111111111e-06,
2348
+ "loss": 0.4319,
2349
+ "step": 8100
2350
+ },
2351
+ {
2352
+ "epoch": 3.25,
2353
+ "grad_norm": 11.600831031799316,
2354
+ "learning_rate": 1.0802469135802469e-06,
2355
+ "loss": 0.4527,
2356
+ "step": 8125
2357
+ },
2358
+ {
2359
+ "epoch": 3.26,
2360
+ "grad_norm": 12.2794771194458,
2361
+ "learning_rate": 1.0493827160493827e-06,
2362
+ "loss": 0.4392,
2363
+ "step": 8150
2364
+ },
2365
+ {
2366
+ "epoch": 3.27,
2367
+ "grad_norm": 10.185335159301758,
2368
+ "learning_rate": 1.0185185185185185e-06,
2369
+ "loss": 0.4585,
2370
+ "step": 8175
2371
+ },
2372
+ {
2373
+ "epoch": 3.2800000000000002,
2374
+ "grad_norm": 11.049323081970215,
2375
+ "learning_rate": 9.876543209876544e-07,
2376
+ "loss": 0.4333,
2377
+ "step": 8200
2378
+ },
2379
+ {
2380
+ "epoch": 3.29,
2381
+ "grad_norm": 9.09365177154541,
2382
+ "learning_rate": 9.567901234567902e-07,
2383
+ "loss": 0.445,
2384
+ "step": 8225
2385
+ },
2386
+ {
2387
+ "epoch": 3.3,
2388
+ "grad_norm": 10.265097618103027,
2389
+ "learning_rate": 9.259259259259259e-07,
2390
+ "loss": 0.4384,
2391
+ "step": 8250
2392
+ },
2393
+ {
2394
+ "epoch": 3.31,
2395
+ "grad_norm": 11.15007495880127,
2396
+ "learning_rate": 8.950617283950618e-07,
2397
+ "loss": 0.4714,
2398
+ "step": 8275
2399
+ },
2400
+ {
2401
+ "epoch": 3.32,
2402
+ "grad_norm": 10.374354362487793,
2403
+ "learning_rate": 8.641975308641976e-07,
2404
+ "loss": 0.4482,
2405
+ "step": 8300
2406
+ },
2407
+ {
2408
+ "epoch": 3.33,
2409
+ "grad_norm": 9.22261905670166,
2410
+ "learning_rate": 8.333333333333333e-07,
2411
+ "loss": 0.4367,
2412
+ "step": 8325
2413
+ },
2414
+ {
2415
+ "epoch": 3.34,
2416
+ "grad_norm": 7.564458847045898,
2417
+ "learning_rate": 8.024691358024692e-07,
2418
+ "loss": 0.4144,
2419
+ "step": 8350
2420
+ },
2421
+ {
2422
+ "epoch": 3.35,
2423
+ "grad_norm": 10.494316101074219,
2424
+ "learning_rate": 7.71604938271605e-07,
2425
+ "loss": 0.4123,
2426
+ "step": 8375
2427
+ },
2428
+ {
2429
+ "epoch": 3.36,
2430
+ "grad_norm": 8.803318977355957,
2431
+ "learning_rate": 7.407407407407407e-07,
2432
+ "loss": 0.4231,
2433
+ "step": 8400
2434
+ },
2435
+ {
2436
+ "epoch": 3.37,
2437
+ "grad_norm": 9.848652839660645,
2438
+ "learning_rate": 7.098765432098766e-07,
2439
+ "loss": 0.4498,
2440
+ "step": 8425
2441
+ },
2442
+ {
2443
+ "epoch": 3.38,
2444
+ "grad_norm": 10.841163635253906,
2445
+ "learning_rate": 6.790123456790124e-07,
2446
+ "loss": 0.4634,
2447
+ "step": 8450
2448
+ },
2449
+ {
2450
+ "epoch": 3.39,
2451
+ "grad_norm": 8.382885932922363,
2452
+ "learning_rate": 6.481481481481481e-07,
2453
+ "loss": 0.4402,
2454
+ "step": 8475
2455
+ },
2456
+ {
2457
+ "epoch": 3.4,
2458
+ "grad_norm": 11.07520866394043,
2459
+ "learning_rate": 6.17283950617284e-07,
2460
+ "loss": 0.3839,
2461
+ "step": 8500
2462
+ },
2463
+ {
2464
+ "epoch": 3.41,
2465
+ "grad_norm": 9.271187782287598,
2466
+ "learning_rate": 5.864197530864198e-07,
2467
+ "loss": 0.4475,
2468
+ "step": 8525
2469
+ },
2470
+ {
2471
+ "epoch": 3.42,
2472
+ "grad_norm": 8.09450912475586,
2473
+ "learning_rate": 5.555555555555555e-07,
2474
+ "loss": 0.3911,
2475
+ "step": 8550
2476
+ },
2477
+ {
2478
+ "epoch": 3.43,
2479
+ "grad_norm": 9.9707612991333,
2480
+ "learning_rate": 5.246913580246914e-07,
2481
+ "loss": 0.4077,
2482
+ "step": 8575
2483
+ },
2484
+ {
2485
+ "epoch": 3.44,
2486
+ "grad_norm": 9.983931541442871,
2487
+ "learning_rate": 4.938271604938272e-07,
2488
+ "loss": 0.4045,
2489
+ "step": 8600
2490
+ },
2491
+ {
2492
+ "epoch": 3.45,
2493
+ "grad_norm": 10.254908561706543,
2494
+ "learning_rate": 4.6296296296296297e-07,
2495
+ "loss": 0.4406,
2496
+ "step": 8625
2497
+ },
2498
+ {
2499
+ "epoch": 3.46,
2500
+ "grad_norm": 12.151867866516113,
2501
+ "learning_rate": 4.320987654320988e-07,
2502
+ "loss": 0.4247,
2503
+ "step": 8650
2504
+ },
2505
+ {
2506
+ "epoch": 3.4699999999999998,
2507
+ "grad_norm": 10.943432807922363,
2508
+ "learning_rate": 4.012345679012346e-07,
2509
+ "loss": 0.3905,
2510
+ "step": 8675
2511
+ },
2512
+ {
2513
+ "epoch": 3.48,
2514
+ "grad_norm": 9.766261100769043,
2515
+ "learning_rate": 3.7037037037037036e-07,
2516
+ "loss": 0.4148,
2517
+ "step": 8700
2518
+ },
2519
+ {
2520
+ "epoch": 3.49,
2521
+ "grad_norm": 10.133684158325195,
2522
+ "learning_rate": 3.395061728395062e-07,
2523
+ "loss": 0.3992,
2524
+ "step": 8725
2525
+ },
2526
+ {
2527
+ "epoch": 3.5,
2528
+ "grad_norm": 9.618481636047363,
2529
+ "learning_rate": 3.08641975308642e-07,
2530
+ "loss": 0.4629,
2531
+ "step": 8750
2532
+ },
2533
+ {
2534
+ "epoch": 3.51,
2535
+ "grad_norm": 8.487075805664062,
2536
+ "learning_rate": 2.7777777777777776e-07,
2537
+ "loss": 0.4331,
2538
+ "step": 8775
2539
+ },
2540
+ {
2541
+ "epoch": 3.52,
2542
+ "grad_norm": 12.730545043945312,
2543
+ "learning_rate": 2.469135802469136e-07,
2544
+ "loss": 0.4387,
2545
+ "step": 8800
2546
+ },
2547
+ {
2548
+ "epoch": 3.5300000000000002,
2549
+ "grad_norm": 11.143440246582031,
2550
+ "learning_rate": 2.160493827160494e-07,
2551
+ "loss": 0.4487,
2552
+ "step": 8825
2553
+ },
2554
+ {
2555
+ "epoch": 3.54,
2556
+ "grad_norm": 10.672728538513184,
2557
+ "learning_rate": 1.8518518518518518e-07,
2558
+ "loss": 0.4346,
2559
+ "step": 8850
2560
+ },
2561
+ {
2562
+ "epoch": 3.55,
2563
+ "grad_norm": 13.340011596679688,
2564
+ "learning_rate": 1.54320987654321e-07,
2565
+ "loss": 0.4646,
2566
+ "step": 8875
2567
+ },
2568
+ {
2569
+ "epoch": 3.56,
2570
+ "grad_norm": 13.566219329833984,
2571
+ "learning_rate": 1.234567901234568e-07,
2572
+ "loss": 0.5128,
2573
+ "step": 8900
2574
+ },
2575
+ {
2576
+ "epoch": 3.57,
2577
+ "grad_norm": 9.667753219604492,
2578
+ "learning_rate": 9.259259259259259e-08,
2579
+ "loss": 0.4815,
2580
+ "step": 8925
2581
+ },
2582
+ {
2583
+ "epoch": 3.58,
2584
+ "grad_norm": 11.755305290222168,
2585
+ "learning_rate": 6.17283950617284e-08,
2586
+ "loss": 0.458,
2587
+ "step": 8950
2588
+ },
2589
+ {
2590
+ "epoch": 3.59,
2591
+ "grad_norm": 8.772011756896973,
2592
+ "learning_rate": 3.08641975308642e-08,
2593
+ "loss": 0.4237,
2594
+ "step": 8975
2595
+ },
2596
+ {
2597
+ "epoch": 3.6,
2598
+ "grad_norm": 9.530144691467285,
2599
+ "learning_rate": 0.0,
2600
+ "loss": 0.4248,
2601
+ "step": 9000
2602
+ },
2603
+ {
2604
+ "epoch": 3.6,
2605
+ "eval_cer": 50.3994673768309,
2606
+ "eval_loss": 0.9010892510414124,
2607
+ "eval_runtime": 1750.8735,
2608
+ "eval_samples_per_second": 2.248,
2609
+ "eval_steps_per_second": 0.281,
2610
+ "step": 9000
2611
  }
2612
  ],
2613
  "logging_steps": 25,
 
2615
  "num_input_tokens_seen": 0,
2616
  "num_train_epochs": 4,
2617
  "save_steps": 1000,
2618
+ "total_flos": 4.155629764608e+19,
2619
  "train_batch_size": 8,
2620
  "trial_name": null,
2621
  "trial_params": null