Training in progress, step 9000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 966995080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:356aa951308cefb9925541a7565a23ebcfe9ff3c00faddd518dc705f7380d87c
|
3 |
size 966995080
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1925064044
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2d7b2162db187feb649c1155e77b58af89b8abb3d58f0381a5a7e9a473ce4f
|
3 |
size 1925064044
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8d5ec1a6108a45f66ba00113edd4b4b9f89042f06bef4dde01cea9a8d8b8ca0
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6ff39835f6fc0ab5432bbe426fb9e017df9b359be5fad96f2d6b3dde669dc72
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 47.53661784287617,
|
3 |
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
|
4 |
-
"epoch": 3.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2319,6 +2319,295 @@
|
|
2319 |
"eval_samples_per_second": 2.27,
|
2320 |
"eval_steps_per_second": 0.284,
|
2321 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2322 |
}
|
2323 |
],
|
2324 |
"logging_steps": 25,
|
@@ -2326,7 +2615,7 @@
|
|
2326 |
"num_input_tokens_seen": 0,
|
2327 |
"num_train_epochs": 4,
|
2328 |
"save_steps": 1000,
|
2329 |
-
"total_flos":
|
2330 |
"train_batch_size": 8,
|
2331 |
"trial_name": null,
|
2332 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 47.53661784287617,
|
3 |
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
|
4 |
+
"epoch": 3.6,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 9000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2319 |
"eval_samples_per_second": 2.27,
|
2320 |
"eval_steps_per_second": 0.284,
|
2321 |
"step": 8000
|
2322 |
+
},
|
2323 |
+
{
|
2324 |
+
"epoch": 3.21,
|
2325 |
+
"grad_norm": 11.635475158691406,
|
2326 |
+
"learning_rate": 1.2037037037037037e-06,
|
2327 |
+
"loss": 0.3935,
|
2328 |
+
"step": 8025
|
2329 |
+
},
|
2330 |
+
{
|
2331 |
+
"epoch": 3.22,
|
2332 |
+
"grad_norm": 9.14508056640625,
|
2333 |
+
"learning_rate": 1.1728395061728396e-06,
|
2334 |
+
"loss": 0.3949,
|
2335 |
+
"step": 8050
|
2336 |
+
},
|
2337 |
+
{
|
2338 |
+
"epoch": 3.23,
|
2339 |
+
"grad_norm": 7.534052848815918,
|
2340 |
+
"learning_rate": 1.1419753086419754e-06,
|
2341 |
+
"loss": 0.462,
|
2342 |
+
"step": 8075
|
2343 |
+
},
|
2344 |
+
{
|
2345 |
+
"epoch": 3.24,
|
2346 |
+
"grad_norm": 10.633529663085938,
|
2347 |
+
"learning_rate": 1.111111111111111e-06,
|
2348 |
+
"loss": 0.4319,
|
2349 |
+
"step": 8100
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 3.25,
|
2353 |
+
"grad_norm": 11.600831031799316,
|
2354 |
+
"learning_rate": 1.0802469135802469e-06,
|
2355 |
+
"loss": 0.4527,
|
2356 |
+
"step": 8125
|
2357 |
+
},
|
2358 |
+
{
|
2359 |
+
"epoch": 3.26,
|
2360 |
+
"grad_norm": 12.2794771194458,
|
2361 |
+
"learning_rate": 1.0493827160493827e-06,
|
2362 |
+
"loss": 0.4392,
|
2363 |
+
"step": 8150
|
2364 |
+
},
|
2365 |
+
{
|
2366 |
+
"epoch": 3.27,
|
2367 |
+
"grad_norm": 10.185335159301758,
|
2368 |
+
"learning_rate": 1.0185185185185185e-06,
|
2369 |
+
"loss": 0.4585,
|
2370 |
+
"step": 8175
|
2371 |
+
},
|
2372 |
+
{
|
2373 |
+
"epoch": 3.2800000000000002,
|
2374 |
+
"grad_norm": 11.049323081970215,
|
2375 |
+
"learning_rate": 9.876543209876544e-07,
|
2376 |
+
"loss": 0.4333,
|
2377 |
+
"step": 8200
|
2378 |
+
},
|
2379 |
+
{
|
2380 |
+
"epoch": 3.29,
|
2381 |
+
"grad_norm": 9.09365177154541,
|
2382 |
+
"learning_rate": 9.567901234567902e-07,
|
2383 |
+
"loss": 0.445,
|
2384 |
+
"step": 8225
|
2385 |
+
},
|
2386 |
+
{
|
2387 |
+
"epoch": 3.3,
|
2388 |
+
"grad_norm": 10.265097618103027,
|
2389 |
+
"learning_rate": 9.259259259259259e-07,
|
2390 |
+
"loss": 0.4384,
|
2391 |
+
"step": 8250
|
2392 |
+
},
|
2393 |
+
{
|
2394 |
+
"epoch": 3.31,
|
2395 |
+
"grad_norm": 11.15007495880127,
|
2396 |
+
"learning_rate": 8.950617283950618e-07,
|
2397 |
+
"loss": 0.4714,
|
2398 |
+
"step": 8275
|
2399 |
+
},
|
2400 |
+
{
|
2401 |
+
"epoch": 3.32,
|
2402 |
+
"grad_norm": 10.374354362487793,
|
2403 |
+
"learning_rate": 8.641975308641976e-07,
|
2404 |
+
"loss": 0.4482,
|
2405 |
+
"step": 8300
|
2406 |
+
},
|
2407 |
+
{
|
2408 |
+
"epoch": 3.33,
|
2409 |
+
"grad_norm": 9.22261905670166,
|
2410 |
+
"learning_rate": 8.333333333333333e-07,
|
2411 |
+
"loss": 0.4367,
|
2412 |
+
"step": 8325
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"epoch": 3.34,
|
2416 |
+
"grad_norm": 7.564458847045898,
|
2417 |
+
"learning_rate": 8.024691358024692e-07,
|
2418 |
+
"loss": 0.4144,
|
2419 |
+
"step": 8350
|
2420 |
+
},
|
2421 |
+
{
|
2422 |
+
"epoch": 3.35,
|
2423 |
+
"grad_norm": 10.494316101074219,
|
2424 |
+
"learning_rate": 7.71604938271605e-07,
|
2425 |
+
"loss": 0.4123,
|
2426 |
+
"step": 8375
|
2427 |
+
},
|
2428 |
+
{
|
2429 |
+
"epoch": 3.36,
|
2430 |
+
"grad_norm": 8.803318977355957,
|
2431 |
+
"learning_rate": 7.407407407407407e-07,
|
2432 |
+
"loss": 0.4231,
|
2433 |
+
"step": 8400
|
2434 |
+
},
|
2435 |
+
{
|
2436 |
+
"epoch": 3.37,
|
2437 |
+
"grad_norm": 9.848652839660645,
|
2438 |
+
"learning_rate": 7.098765432098766e-07,
|
2439 |
+
"loss": 0.4498,
|
2440 |
+
"step": 8425
|
2441 |
+
},
|
2442 |
+
{
|
2443 |
+
"epoch": 3.38,
|
2444 |
+
"grad_norm": 10.841163635253906,
|
2445 |
+
"learning_rate": 6.790123456790124e-07,
|
2446 |
+
"loss": 0.4634,
|
2447 |
+
"step": 8450
|
2448 |
+
},
|
2449 |
+
{
|
2450 |
+
"epoch": 3.39,
|
2451 |
+
"grad_norm": 8.382885932922363,
|
2452 |
+
"learning_rate": 6.481481481481481e-07,
|
2453 |
+
"loss": 0.4402,
|
2454 |
+
"step": 8475
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 3.4,
|
2458 |
+
"grad_norm": 11.07520866394043,
|
2459 |
+
"learning_rate": 6.17283950617284e-07,
|
2460 |
+
"loss": 0.3839,
|
2461 |
+
"step": 8500
|
2462 |
+
},
|
2463 |
+
{
|
2464 |
+
"epoch": 3.41,
|
2465 |
+
"grad_norm": 9.271187782287598,
|
2466 |
+
"learning_rate": 5.864197530864198e-07,
|
2467 |
+
"loss": 0.4475,
|
2468 |
+
"step": 8525
|
2469 |
+
},
|
2470 |
+
{
|
2471 |
+
"epoch": 3.42,
|
2472 |
+
"grad_norm": 8.09450912475586,
|
2473 |
+
"learning_rate": 5.555555555555555e-07,
|
2474 |
+
"loss": 0.3911,
|
2475 |
+
"step": 8550
|
2476 |
+
},
|
2477 |
+
{
|
2478 |
+
"epoch": 3.43,
|
2479 |
+
"grad_norm": 9.9707612991333,
|
2480 |
+
"learning_rate": 5.246913580246914e-07,
|
2481 |
+
"loss": 0.4077,
|
2482 |
+
"step": 8575
|
2483 |
+
},
|
2484 |
+
{
|
2485 |
+
"epoch": 3.44,
|
2486 |
+
"grad_norm": 9.983931541442871,
|
2487 |
+
"learning_rate": 4.938271604938272e-07,
|
2488 |
+
"loss": 0.4045,
|
2489 |
+
"step": 8600
|
2490 |
+
},
|
2491 |
+
{
|
2492 |
+
"epoch": 3.45,
|
2493 |
+
"grad_norm": 10.254908561706543,
|
2494 |
+
"learning_rate": 4.6296296296296297e-07,
|
2495 |
+
"loss": 0.4406,
|
2496 |
+
"step": 8625
|
2497 |
+
},
|
2498 |
+
{
|
2499 |
+
"epoch": 3.46,
|
2500 |
+
"grad_norm": 12.151867866516113,
|
2501 |
+
"learning_rate": 4.320987654320988e-07,
|
2502 |
+
"loss": 0.4247,
|
2503 |
+
"step": 8650
|
2504 |
+
},
|
2505 |
+
{
|
2506 |
+
"epoch": 3.4699999999999998,
|
2507 |
+
"grad_norm": 10.943432807922363,
|
2508 |
+
"learning_rate": 4.012345679012346e-07,
|
2509 |
+
"loss": 0.3905,
|
2510 |
+
"step": 8675
|
2511 |
+
},
|
2512 |
+
{
|
2513 |
+
"epoch": 3.48,
|
2514 |
+
"grad_norm": 9.766261100769043,
|
2515 |
+
"learning_rate": 3.7037037037037036e-07,
|
2516 |
+
"loss": 0.4148,
|
2517 |
+
"step": 8700
|
2518 |
+
},
|
2519 |
+
{
|
2520 |
+
"epoch": 3.49,
|
2521 |
+
"grad_norm": 10.133684158325195,
|
2522 |
+
"learning_rate": 3.395061728395062e-07,
|
2523 |
+
"loss": 0.3992,
|
2524 |
+
"step": 8725
|
2525 |
+
},
|
2526 |
+
{
|
2527 |
+
"epoch": 3.5,
|
2528 |
+
"grad_norm": 9.618481636047363,
|
2529 |
+
"learning_rate": 3.08641975308642e-07,
|
2530 |
+
"loss": 0.4629,
|
2531 |
+
"step": 8750
|
2532 |
+
},
|
2533 |
+
{
|
2534 |
+
"epoch": 3.51,
|
2535 |
+
"grad_norm": 8.487075805664062,
|
2536 |
+
"learning_rate": 2.7777777777777776e-07,
|
2537 |
+
"loss": 0.4331,
|
2538 |
+
"step": 8775
|
2539 |
+
},
|
2540 |
+
{
|
2541 |
+
"epoch": 3.52,
|
2542 |
+
"grad_norm": 12.730545043945312,
|
2543 |
+
"learning_rate": 2.469135802469136e-07,
|
2544 |
+
"loss": 0.4387,
|
2545 |
+
"step": 8800
|
2546 |
+
},
|
2547 |
+
{
|
2548 |
+
"epoch": 3.5300000000000002,
|
2549 |
+
"grad_norm": 11.143440246582031,
|
2550 |
+
"learning_rate": 2.160493827160494e-07,
|
2551 |
+
"loss": 0.4487,
|
2552 |
+
"step": 8825
|
2553 |
+
},
|
2554 |
+
{
|
2555 |
+
"epoch": 3.54,
|
2556 |
+
"grad_norm": 10.672728538513184,
|
2557 |
+
"learning_rate": 1.8518518518518518e-07,
|
2558 |
+
"loss": 0.4346,
|
2559 |
+
"step": 8850
|
2560 |
+
},
|
2561 |
+
{
|
2562 |
+
"epoch": 3.55,
|
2563 |
+
"grad_norm": 13.340011596679688,
|
2564 |
+
"learning_rate": 1.54320987654321e-07,
|
2565 |
+
"loss": 0.4646,
|
2566 |
+
"step": 8875
|
2567 |
+
},
|
2568 |
+
{
|
2569 |
+
"epoch": 3.56,
|
2570 |
+
"grad_norm": 13.566219329833984,
|
2571 |
+
"learning_rate": 1.234567901234568e-07,
|
2572 |
+
"loss": 0.5128,
|
2573 |
+
"step": 8900
|
2574 |
+
},
|
2575 |
+
{
|
2576 |
+
"epoch": 3.57,
|
2577 |
+
"grad_norm": 9.667753219604492,
|
2578 |
+
"learning_rate": 9.259259259259259e-08,
|
2579 |
+
"loss": 0.4815,
|
2580 |
+
"step": 8925
|
2581 |
+
},
|
2582 |
+
{
|
2583 |
+
"epoch": 3.58,
|
2584 |
+
"grad_norm": 11.755305290222168,
|
2585 |
+
"learning_rate": 6.17283950617284e-08,
|
2586 |
+
"loss": 0.458,
|
2587 |
+
"step": 8950
|
2588 |
+
},
|
2589 |
+
{
|
2590 |
+
"epoch": 3.59,
|
2591 |
+
"grad_norm": 8.772011756896973,
|
2592 |
+
"learning_rate": 3.08641975308642e-08,
|
2593 |
+
"loss": 0.4237,
|
2594 |
+
"step": 8975
|
2595 |
+
},
|
2596 |
+
{
|
2597 |
+
"epoch": 3.6,
|
2598 |
+
"grad_norm": 9.530144691467285,
|
2599 |
+
"learning_rate": 0.0,
|
2600 |
+
"loss": 0.4248,
|
2601 |
+
"step": 9000
|
2602 |
+
},
|
2603 |
+
{
|
2604 |
+
"epoch": 3.6,
|
2605 |
+
"eval_cer": 50.3994673768309,
|
2606 |
+
"eval_loss": 0.9010892510414124,
|
2607 |
+
"eval_runtime": 1750.8735,
|
2608 |
+
"eval_samples_per_second": 2.248,
|
2609 |
+
"eval_steps_per_second": 0.281,
|
2610 |
+
"step": 9000
|
2611 |
}
|
2612 |
],
|
2613 |
"logging_steps": 25,
|
|
|
2615 |
"num_input_tokens_seen": 0,
|
2616 |
"num_train_epochs": 4,
|
2617 |
"save_steps": 1000,
|
2618 |
+
"total_flos": 4.155629764608e+19,
|
2619 |
"train_batch_size": 8,
|
2620 |
"trial_name": null,
|
2621 |
"trial_params": null
|