|
{ |
|
"best_metric": 0.09535224735736847, |
|
"best_model_checkpoint": "XLS-R-300m-korean-ipa_v2/checkpoint-8000", |
|
"epoch": 12.778930566640064, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 5.6413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 2.9849, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 2.6488, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 2.1382, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00015, |
|
"loss": 0.8032, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_cer": 0.28992062691770265, |
|
"eval_loss": 0.5041582584381104, |
|
"eval_runtime": 249.7666, |
|
"eval_samples_per_second": 8.916, |
|
"eval_steps_per_second": 1.117, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.4962, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.4136, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.3741, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00027, |
|
"loss": 0.3477, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.326, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_cer": 0.17689681499301343, |
|
"eval_loss": 0.26741623878479004, |
|
"eval_runtime": 208.1626, |
|
"eval_samples_per_second": 10.698, |
|
"eval_steps_per_second": 1.34, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0002973958333333333, |
|
"loss": 0.3141, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0002947916666666666, |
|
"loss": 0.3045, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00029218749999999997, |
|
"loss": 0.2645, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0002895833333333333, |
|
"loss": 0.2399, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002869791666666666, |
|
"loss": 0.2343, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_cer": 0.12242325588095478, |
|
"eval_loss": 0.18896125257015228, |
|
"eval_runtime": 208.4383, |
|
"eval_samples_per_second": 10.684, |
|
"eval_steps_per_second": 1.339, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00028437499999999996, |
|
"loss": 0.2198, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0002817708333333333, |
|
"loss": 0.2218, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00027916666666666666, |
|
"loss": 0.2139, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00027656249999999995, |
|
"loss": 0.2009, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0002739583333333333, |
|
"loss": 0.1696, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_cer": 0.10347311254843775, |
|
"eval_loss": 0.16067682206630707, |
|
"eval_runtime": 204.5024, |
|
"eval_samples_per_second": 10.89, |
|
"eval_steps_per_second": 1.364, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00027135416666666665, |
|
"loss": 0.1707, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00026875, |
|
"loss": 0.1663, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0002661458333333333, |
|
"loss": 0.1577, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00026354166666666664, |
|
"loss": 0.1598, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00026093749999999994, |
|
"loss": 0.1608, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_cer": 0.0870294876042582, |
|
"eval_loss": 0.14269110560417175, |
|
"eval_runtime": 200.8042, |
|
"eval_samples_per_second": 11.09, |
|
"eval_steps_per_second": 1.389, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00025833333333333334, |
|
"loss": 0.1371, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00025572916666666664, |
|
"loss": 0.1334, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.000253125, |
|
"loss": 0.1318, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0002505208333333333, |
|
"loss": 0.1305, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00024791666666666663, |
|
"loss": 0.1275, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_cer": 0.07236491450467451, |
|
"eval_loss": 0.12700112164020538, |
|
"eval_runtime": 194.3447, |
|
"eval_samples_per_second": 11.459, |
|
"eval_steps_per_second": 1.436, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0002453125, |
|
"loss": 0.1329, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.00024270833333333333, |
|
"loss": 0.1233, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00024010416666666665, |
|
"loss": 0.1117, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00023749999999999997, |
|
"loss": 0.1176, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00023489583333333332, |
|
"loss": 0.1107, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_cer": 0.07061467321626644, |
|
"eval_loss": 0.12084876000881195, |
|
"eval_runtime": 194.5834, |
|
"eval_samples_per_second": 11.445, |
|
"eval_steps_per_second": 1.434, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.00023229166666666664, |
|
"loss": 0.1075, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0002296875, |
|
"loss": 0.1108, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00022708333333333331, |
|
"loss": 0.1066, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.00022447916666666664, |
|
"loss": 0.0922, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00022187499999999999, |
|
"loss": 0.0935, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_cer": 0.066206658119535, |
|
"eval_loss": 0.12421450763940811, |
|
"eval_runtime": 193.406, |
|
"eval_samples_per_second": 11.515, |
|
"eval_steps_per_second": 1.443, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.0002192708333333333, |
|
"loss": 0.1012, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00021666666666666666, |
|
"loss": 0.1015, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.00021406249999999998, |
|
"loss": 0.1004, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.00021145833333333333, |
|
"loss": 0.0979, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.00020885416666666665, |
|
"loss": 0.0827, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"eval_cer": 0.05754908598510494, |
|
"eval_loss": 0.1160559132695198, |
|
"eval_runtime": 196.9923, |
|
"eval_samples_per_second": 11.305, |
|
"eval_steps_per_second": 1.416, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00020624999999999997, |
|
"loss": 0.0851, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00020364583333333332, |
|
"loss": 0.0842, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.00020104166666666664, |
|
"loss": 0.0846, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.0001984375, |
|
"loss": 0.0869, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.00019583333333333331, |
|
"loss": 0.0885, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_cer": 0.053594837148331145, |
|
"eval_loss": 0.10113094747066498, |
|
"eval_runtime": 209.0359, |
|
"eval_samples_per_second": 10.654, |
|
"eval_steps_per_second": 1.335, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.00019322916666666664, |
|
"loss": 0.0755, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.00019062499999999998, |
|
"loss": 0.0754, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.00018802083333333333, |
|
"loss": 0.0766, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.00018541666666666666, |
|
"loss": 0.0747, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.00018281249999999998, |
|
"loss": 0.0744, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_cer": 0.052132701421800945, |
|
"eval_loss": 0.10384104400873184, |
|
"eval_runtime": 213.8935, |
|
"eval_samples_per_second": 10.412, |
|
"eval_steps_per_second": 1.304, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 0.0001802083333333333, |
|
"loss": 0.076, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.00017760416666666665, |
|
"loss": 0.0723, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.000175, |
|
"loss": 0.0705, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.00017239583333333332, |
|
"loss": 0.0707, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.00016979166666666664, |
|
"loss": 0.0721, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"eval_cer": 0.05004393609818637, |
|
"eval_loss": 0.10492419451475143, |
|
"eval_runtime": 214.0314, |
|
"eval_samples_per_second": 10.405, |
|
"eval_steps_per_second": 1.304, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.00016718749999999996, |
|
"loss": 0.0684, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00016458333333333334, |
|
"loss": 0.0652, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.00016197916666666666, |
|
"loss": 0.0656, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.00015937499999999998, |
|
"loss": 0.0631, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.0001567708333333333, |
|
"loss": 0.0646, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_cer": 0.048113628833604635, |
|
"eval_loss": 0.1038593202829361, |
|
"eval_runtime": 212.7231, |
|
"eval_samples_per_second": 10.469, |
|
"eval_steps_per_second": 1.312, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.00015416666666666663, |
|
"loss": 0.0648, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 0.0001515625, |
|
"loss": 0.0612, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.000148984375, |
|
"loss": 0.0617, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 0.00014638020833333332, |
|
"loss": 0.0614, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 0.00014377604166666665, |
|
"loss": 0.0548, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"eval_cer": 0.04586640545095723, |
|
"eval_loss": 0.09844804555177689, |
|
"eval_runtime": 216.1335, |
|
"eval_samples_per_second": 10.304, |
|
"eval_steps_per_second": 1.291, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 0.000141171875, |
|
"loss": 0.0559, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.00013856770833333332, |
|
"loss": 0.0549, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.00013596354166666667, |
|
"loss": 0.0571, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 0.000133359375, |
|
"loss": 0.0589, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 0.00013075520833333334, |
|
"loss": 0.0537, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_cer": 0.04361918206830983, |
|
"eval_loss": 0.10360600054264069, |
|
"eval_runtime": 277.1197, |
|
"eval_samples_per_second": 8.036, |
|
"eval_steps_per_second": 1.007, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.00012815104166666666, |
|
"loss": 0.052, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 0.000125546875, |
|
"loss": 0.051, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.00012294270833333333, |
|
"loss": 0.0505, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.00012033854166666665, |
|
"loss": 0.0507, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 0.00011773437499999999, |
|
"loss": 0.0491, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"eval_cer": 0.04207781731226321, |
|
"eval_loss": 0.09535224735736847, |
|
"eval_runtime": 213.4424, |
|
"eval_samples_per_second": 10.434, |
|
"eval_steps_per_second": 1.307, |
|
"step": 8000 |
|
} |
|
], |
|
"max_steps": 12520, |
|
"num_train_epochs": 20, |
|
"total_flos": 6.6069952716691825e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|