{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.454746136865342, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.59, "learning_rate": 0.0002382, "loss": 6.1211, "step": 400 }, { "epoch": 0.59, "eval_loss": 3.2328364849090576, "eval_runtime": 546.0856, "eval_samples_per_second": 13.958, "eval_steps_per_second": 1.745, "eval_wer": 0.9991302123305194, "step": 400 }, { "epoch": 1.18, "learning_rate": 0.0002955158530447911, "loss": 2.304, "step": 800 }, { "epoch": 1.18, "eval_loss": 1.0820916891098022, "eval_runtime": 551.5198, "eval_samples_per_second": 13.82, "eval_steps_per_second": 1.728, "eval_wer": 0.8043745203376823, "step": 800 }, { "epoch": 1.77, "learning_rate": 0.0002894765978862607, "loss": 0.9673, "step": 1200 }, { "epoch": 1.77, "eval_loss": 0.7271230816841125, "eval_runtime": 547.9451, "eval_samples_per_second": 13.91, "eval_steps_per_second": 1.739, "eval_wer": 0.6917881811204911, "step": 1200 }, { "epoch": 2.36, "learning_rate": 0.0002834373427277302, "loss": 0.7607, "step": 1600 }, { "epoch": 2.36, "eval_loss": 0.6257076263427734, "eval_runtime": 545.1866, "eval_samples_per_second": 13.981, "eval_steps_per_second": 1.748, "eval_wer": 0.6329751854694295, "step": 1600 }, { "epoch": 2.94, "learning_rate": 0.00027739808756919976, "loss": 0.689, "step": 2000 }, { "epoch": 2.94, "eval_loss": 0.5595377683639526, "eval_runtime": 545.7121, "eval_samples_per_second": 13.967, "eval_steps_per_second": 1.746, "eval_wer": 0.6035303146584804, "step": 2000 }, { "epoch": 3.53, "learning_rate": 0.00027135883241066934, "loss": 0.5775, "step": 2400 }, { "epoch": 3.53, "eval_loss": 0.582660973072052, "eval_runtime": 547.0999, "eval_samples_per_second": 13.932, "eval_steps_per_second": 1.742, "eval_wer": 0.6055257099002302, "step": 2400 }, { "epoch": 4.12, "learning_rate": 0.00026531957725213886, "loss": 0.5621, "step": 2800 }, { "epoch": 4.12, "eval_loss": 0.5549562573432922, "eval_runtime": 546.696, "eval_samples_per_second": 13.942, "eval_steps_per_second": 1.743, "eval_wer": 0.5691992837042722, "step": 2800 }, { "epoch": 4.71, "learning_rate": 0.00025928032209360843, "loss": 0.5014, "step": 3200 }, { "epoch": 4.71, "eval_loss": 0.5490128993988037, "eval_runtime": 542.4177, "eval_samples_per_second": 14.052, "eval_steps_per_second": 1.757, "eval_wer": 0.5637503197748784, "step": 3200 }, { "epoch": 5.3, "learning_rate": 0.000253241066935078, "loss": 0.4781, "step": 3600 }, { "epoch": 5.3, "eval_loss": 0.5757995247840881, "eval_runtime": 543.8318, "eval_samples_per_second": 14.015, "eval_steps_per_second": 1.752, "eval_wer": 0.5655154771041186, "step": 3600 }, { "epoch": 5.89, "learning_rate": 0.0002472018117765476, "loss": 0.4499, "step": 4000 }, { "epoch": 5.89, "eval_loss": 0.5555837154388428, "eval_runtime": 545.7873, "eval_samples_per_second": 13.965, "eval_steps_per_second": 1.746, "eval_wer": 0.5522640061396776, "step": 4000 }, { "epoch": 6.48, "learning_rate": 0.0002411625566180171, "loss": 0.4095, "step": 4400 }, { "epoch": 6.48, "eval_loss": 0.5786208510398865, "eval_runtime": 543.989, "eval_samples_per_second": 14.011, "eval_steps_per_second": 1.752, "eval_wer": 0.5524942440521873, "step": 4400 }, { "epoch": 7.07, "learning_rate": 0.00023512330145948666, "loss": 0.4003, "step": 4800 }, { "epoch": 7.07, "eval_loss": 0.5860427021980286, "eval_runtime": 549.6883, "eval_samples_per_second": 13.866, "eval_steps_per_second": 1.734, "eval_wer": 0.5389613711946789, "step": 4800 }, { "epoch": 7.66, "learning_rate": 0.00022908404630095618, "loss": 0.3653, "step": 5200 }, { "epoch": 7.66, "eval_loss": 0.5734272003173828, "eval_runtime": 542.4405, "eval_samples_per_second": 14.051, "eval_steps_per_second": 1.757, "eval_wer": 0.5064466615502686, "step": 5200 }, { "epoch": 8.25, "learning_rate": 0.00022304479114242576, "loss": 0.3454, "step": 5600 }, { "epoch": 8.25, "eval_loss": 0.5864331126213074, "eval_runtime": 543.9245, "eval_samples_per_second": 14.013, "eval_steps_per_second": 1.752, "eval_wer": 0.4945510360706063, "step": 5600 }, { "epoch": 8.84, "learning_rate": 0.0002170055359838953, "loss": 0.3223, "step": 6000 }, { "epoch": 8.84, "eval_loss": 0.5884710550308228, "eval_runtime": 548.5513, "eval_samples_per_second": 13.895, "eval_steps_per_second": 1.737, "eval_wer": 0.500332565873625, "step": 6000 }, { "epoch": 9.43, "learning_rate": 0.00021096628082536487, "loss": 0.2897, "step": 6400 }, { "epoch": 9.43, "eval_loss": 0.6017025113105774, "eval_runtime": 545.222, "eval_samples_per_second": 13.98, "eval_steps_per_second": 1.748, "eval_wer": 0.49053466359682785, "step": 6400 }, { "epoch": 10.01, "learning_rate": 0.0002049270256668344, "loss": 0.289, "step": 6800 }, { "epoch": 10.01, "eval_loss": 0.63252192735672, "eval_runtime": 547.5169, "eval_samples_per_second": 13.921, "eval_steps_per_second": 1.741, "eval_wer": 0.4930928626247122, "step": 6800 }, { "epoch": 10.6, "learning_rate": 0.00019888777050830396, "loss": 0.2488, "step": 7200 }, { "epoch": 10.6, "eval_loss": 0.6799584627151489, "eval_runtime": 541.9978, "eval_samples_per_second": 14.063, "eval_steps_per_second": 1.758, "eval_wer": 0.49833717063187516, "step": 7200 }, { "epoch": 11.19, "learning_rate": 0.0001928485153497735, "loss": 0.2361, "step": 7600 }, { "epoch": 11.19, "eval_loss": 0.6552415490150452, "eval_runtime": 544.0722, "eval_samples_per_second": 14.009, "eval_steps_per_second": 1.752, "eval_wer": 0.5002046559222307, "step": 7600 }, { "epoch": 11.78, "learning_rate": 0.00018682435832913938, "loss": 0.2275, "step": 8000 }, { "epoch": 11.78, "eval_loss": 0.6828446984291077, "eval_runtime": 543.1333, "eval_samples_per_second": 14.033, "eval_steps_per_second": 1.755, "eval_wer": 0.4898183678690202, "step": 8000 }, { "epoch": 12.37, "learning_rate": 0.00018078510317060895, "loss": 0.2109, "step": 8400 }, { "epoch": 12.37, "eval_loss": 0.6952915787696838, "eval_runtime": 542.929, "eval_samples_per_second": 14.039, "eval_steps_per_second": 1.755, "eval_wer": 0.4861601432591456, "step": 8400 }, { "epoch": 12.96, "learning_rate": 0.0001747458480120785, "loss": 0.2061, "step": 8800 }, { "epoch": 12.96, "eval_loss": 0.6886131167411804, "eval_runtime": 544.2666, "eval_samples_per_second": 14.004, "eval_steps_per_second": 1.751, "eval_wer": 0.4692504476848299, "step": 8800 }, { "epoch": 13.55, "learning_rate": 0.00016870659285354804, "loss": 0.1874, "step": 9200 }, { "epoch": 13.55, "eval_loss": 0.7013294696807861, "eval_runtime": 543.7236, "eval_samples_per_second": 14.018, "eval_steps_per_second": 1.753, "eval_wer": 0.47367613200306985, "step": 9200 }, { "epoch": 14.14, "learning_rate": 0.00016266733769501759, "loss": 0.1824, "step": 9600 }, { "epoch": 14.14, "eval_loss": 0.7199532985687256, "eval_runtime": 545.3665, "eval_samples_per_second": 13.976, "eval_steps_per_second": 1.747, "eval_wer": 0.4730621642363776, "step": 9600 }, { "epoch": 14.73, "learning_rate": 0.00015662808253648716, "loss": 0.1773, "step": 10000 }, { "epoch": 14.73, "eval_loss": 0.6803578734397888, "eval_runtime": 541.6325, "eval_samples_per_second": 14.072, "eval_steps_per_second": 1.759, "eval_wer": 0.47045280122793554, "step": 10000 }, { "epoch": 15.32, "learning_rate": 0.00015058882737795668, "loss": 0.1663, "step": 10400 }, { "epoch": 15.32, "eval_loss": 0.6929047703742981, "eval_runtime": 543.7575, "eval_samples_per_second": 14.017, "eval_steps_per_second": 1.753, "eval_wer": 0.4615246866206191, "step": 10400 }, { "epoch": 15.91, "learning_rate": 0.00014454957221942625, "loss": 0.1529, "step": 10800 }, { "epoch": 15.91, "eval_loss": 0.7400447130203247, "eval_runtime": 541.8998, "eval_samples_per_second": 14.065, "eval_steps_per_second": 1.759, "eval_wer": 0.4675364543361474, "step": 10800 }, { "epoch": 16.49, "learning_rate": 0.00013851031706089582, "loss": 0.1406, "step": 11200 }, { "epoch": 16.49, "eval_loss": 0.7907389998435974, "eval_runtime": 543.7393, "eval_samples_per_second": 14.018, "eval_steps_per_second": 1.753, "eval_wer": 0.46472243540547453, "step": 11200 }, { "epoch": 17.08, "learning_rate": 0.0001324861600402617, "loss": 0.1376, "step": 11600 }, { "epoch": 17.08, "eval_loss": 0.8006933927536011, "eval_runtime": 543.2472, "eval_samples_per_second": 14.03, "eval_steps_per_second": 1.754, "eval_wer": 0.46689690457917626, "step": 11600 }, { "epoch": 17.67, "learning_rate": 0.00012644690488173123, "loss": 0.1273, "step": 12000 }, { "epoch": 17.67, "eval_loss": 0.7544116973876953, "eval_runtime": 544.2672, "eval_samples_per_second": 14.004, "eval_steps_per_second": 1.751, "eval_wer": 0.46403172166794576, "step": 12000 }, { "epoch": 18.26, "learning_rate": 0.00012040764972320079, "loss": 0.1252, "step": 12400 }, { "epoch": 18.26, "eval_loss": 0.7654944658279419, "eval_runtime": 548.3671, "eval_samples_per_second": 13.899, "eval_steps_per_second": 1.738, "eval_wer": 0.45349194167306217, "step": 12400 }, { "epoch": 18.85, "learning_rate": 0.00011436839456467034, "loss": 0.1226, "step": 12800 }, { "epoch": 18.85, "eval_loss": 0.7361114621162415, "eval_runtime": 546.1341, "eval_samples_per_second": 13.956, "eval_steps_per_second": 1.745, "eval_wer": 0.4549245331286774, "step": 12800 }, { "epoch": 19.44, "learning_rate": 0.0001083291394061399, "loss": 0.1165, "step": 13200 }, { "epoch": 19.44, "eval_loss": 0.8643974661827087, "eval_runtime": 543.4889, "eval_samples_per_second": 14.024, "eval_steps_per_second": 1.753, "eval_wer": 0.45622921463289845, "step": 13200 }, { "epoch": 20.03, "learning_rate": 0.00010230498238550579, "loss": 0.1119, "step": 13600 }, { "epoch": 20.03, "eval_loss": 0.8597950339317322, "eval_runtime": 545.2818, "eval_samples_per_second": 13.978, "eval_steps_per_second": 1.748, "eval_wer": 0.4632642619595805, "step": 13600 }, { "epoch": 20.62, "learning_rate": 9.626572722697534e-05, "loss": 0.1077, "step": 14000 }, { "epoch": 20.62, "eval_loss": 0.8093447089195251, "eval_runtime": 541.9895, "eval_samples_per_second": 14.063, "eval_steps_per_second": 1.758, "eval_wer": 0.453031465848043, "step": 14000 }, { "epoch": 21.21, "learning_rate": 9.02264720684449e-05, "loss": 0.102, "step": 14400 }, { "epoch": 21.21, "eval_loss": 0.8589721918106079, "eval_runtime": 548.1684, "eval_samples_per_second": 13.904, "eval_steps_per_second": 1.739, "eval_wer": 0.4514965464313124, "step": 14400 }, { "epoch": 21.8, "learning_rate": 8.418721690991444e-05, "loss": 0.0939, "step": 14800 }, { "epoch": 21.8, "eval_loss": 0.8175553679466248, "eval_runtime": 541.1705, "eval_samples_per_second": 14.084, "eval_steps_per_second": 1.761, "eval_wer": 0.4527500639549757, "step": 14800 }, { "epoch": 22.39, "learning_rate": 7.814796175138399e-05, "loss": 0.0898, "step": 15200 }, { "epoch": 22.39, "eval_loss": 0.8843649625778198, "eval_runtime": 546.2264, "eval_samples_per_second": 13.954, "eval_steps_per_second": 1.745, "eval_wer": 0.44740342798669736, "step": 15200 }, { "epoch": 22.97, "learning_rate": 7.210870659285354e-05, "loss": 0.0903, "step": 15600 }, { "epoch": 22.97, "eval_loss": 0.8875829577445984, "eval_runtime": 540.326, "eval_samples_per_second": 14.106, "eval_steps_per_second": 1.764, "eval_wer": 0.45231517012023537, "step": 15600 }, { "epoch": 23.56, "learning_rate": 6.606945143432309e-05, "loss": 0.0848, "step": 16000 }, { "epoch": 23.56, "eval_loss": 0.9255176782608032, "eval_runtime": 544.8055, "eval_samples_per_second": 13.99, "eval_steps_per_second": 1.749, "eval_wer": 0.44814530570478384, "step": 16000 }, { "epoch": 24.15, "learning_rate": 6.003019627579265e-05, "loss": 0.0822, "step": 16400 }, { "epoch": 24.15, "eval_loss": 0.9284627437591553, "eval_runtime": 544.7798, "eval_samples_per_second": 13.991, "eval_steps_per_second": 1.749, "eval_wer": 0.4471220260936301, "step": 16400 }, { "epoch": 24.74, "learning_rate": 5.39909411172622e-05, "loss": 0.0767, "step": 16800 }, { "epoch": 24.74, "eval_loss": 0.9410629868507385, "eval_runtime": 544.0954, "eval_samples_per_second": 14.009, "eval_steps_per_second": 1.752, "eval_wer": 0.4394218470196981, "step": 16800 }, { "epoch": 25.33, "learning_rate": 4.795168595873175e-05, "loss": 0.0735, "step": 17200 }, { "epoch": 25.33, "eval_loss": 0.9868486523628235, "eval_runtime": 546.6128, "eval_samples_per_second": 13.944, "eval_steps_per_second": 1.743, "eval_wer": 0.44195446405730365, "step": 17200 }, { "epoch": 25.92, "learning_rate": 4.1927528938097633e-05, "loss": 0.0728, "step": 17600 }, { "epoch": 25.92, "eval_loss": 0.938178539276123, "eval_runtime": 542.4257, "eval_samples_per_second": 14.052, "eval_steps_per_second": 1.757, "eval_wer": 0.4406753645433615, "step": 17600 }, { "epoch": 26.51, "learning_rate": 3.590337191746351e-05, "loss": 0.0673, "step": 18000 }, { "epoch": 26.51, "eval_loss": 0.9842382669448853, "eval_runtime": 544.7728, "eval_samples_per_second": 13.991, "eval_steps_per_second": 1.749, "eval_wer": 0.4360706062931696, "step": 18000 }, { "epoch": 27.1, "learning_rate": 2.9864116758933062e-05, "loss": 0.0683, "step": 18400 }, { "epoch": 27.1, "eval_loss": 0.9531042575836182, "eval_runtime": 545.0114, "eval_samples_per_second": 13.985, "eval_steps_per_second": 1.749, "eval_wer": 0.4396520849322077, "step": 18400 }, { "epoch": 27.69, "learning_rate": 2.3824861600402614e-05, "loss": 0.0631, "step": 18800 }, { "epoch": 27.69, "eval_loss": 0.9613842964172363, "eval_runtime": 542.3788, "eval_samples_per_second": 14.053, "eval_steps_per_second": 1.757, "eval_wer": 0.4402148887183423, "step": 18800 }, { "epoch": 28.28, "learning_rate": 1.7785606441872167e-05, "loss": 0.0625, "step": 19200 }, { "epoch": 28.28, "eval_loss": 0.9772672057151794, "eval_runtime": 545.7524, "eval_samples_per_second": 13.966, "eval_steps_per_second": 1.746, "eval_wer": 0.4349194167306216, "step": 19200 }, { "epoch": 28.87, "learning_rate": 1.174635128334172e-05, "loss": 0.0599, "step": 19600 }, { "epoch": 28.87, "eval_loss": 0.9910905361175537, "eval_runtime": 543.4098, "eval_samples_per_second": 14.026, "eval_steps_per_second": 1.754, "eval_wer": 0.4378613456126887, "step": 19600 }, { "epoch": 29.45, "learning_rate": 5.707096124811273e-06, "loss": 0.0576, "step": 20000 }, { "epoch": 29.45, "eval_loss": 0.9930649995803833, "eval_runtime": 544.5195, "eval_samples_per_second": 13.998, "eval_steps_per_second": 1.75, "eval_wer": 0.435354310565362, "step": 20000 } ], "max_steps": 20370, "num_train_epochs": 30, "total_flos": 8.26011084747878e+19, "trial_name": null, "trial_params": null }