{ "best_metric": 2.2782892974889872, "best_model_checkpoint": "./whisper-medium_new_data/checkpoint-8000", "epoch": 3.261312678353037, "eval_steps": 1000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010191602119853241, "grad_norm": 29.000947952270508, "learning_rate": 2.1000000000000003e-07, "loss": 3.777, "step": 25 }, { "epoch": 0.020383204239706482, "grad_norm": 16.35529136657715, "learning_rate": 4.6000000000000004e-07, "loss": 3.379, "step": 50 }, { "epoch": 0.030574806359559722, "grad_norm": 11.864029884338379, "learning_rate": 7.1e-07, "loss": 2.7049, "step": 75 }, { "epoch": 0.040766408479412965, "grad_norm": 9.735739707946777, "learning_rate": 9.600000000000001e-07, "loss": 2.1205, "step": 100 }, { "epoch": 0.050958010599266204, "grad_norm": 7.762232780456543, "learning_rate": 1.21e-06, "loss": 1.731, "step": 125 }, { "epoch": 0.061149612719119444, "grad_norm": 8.16604995727539, "learning_rate": 1.46e-06, "loss": 1.4442, "step": 150 }, { "epoch": 0.07134121483897268, "grad_norm": 7.53511905670166, "learning_rate": 1.7100000000000004e-06, "loss": 1.244, "step": 175 }, { "epoch": 0.08153281695882593, "grad_norm": 7.6938557624816895, "learning_rate": 1.9600000000000003e-06, "loss": 1.0535, "step": 200 }, { "epoch": 0.09172441907867916, "grad_norm": 7.2217912673950195, "learning_rate": 2.21e-06, "loss": 0.9467, "step": 225 }, { "epoch": 0.10191602119853241, "grad_norm": 6.709696292877197, "learning_rate": 2.46e-06, "loss": 0.8746, "step": 250 }, { "epoch": 0.11210762331838565, "grad_norm": 6.30186653137207, "learning_rate": 2.7100000000000003e-06, "loss": 0.7813, "step": 275 }, { "epoch": 0.12229922543823889, "grad_norm": 7.001236915588379, "learning_rate": 2.96e-06, "loss": 0.7312, "step": 300 }, { "epoch": 0.13249082755809213, "grad_norm": 6.0768232345581055, "learning_rate": 3.21e-06, "loss": 0.6602, "step": 325 }, { "epoch": 0.14268242967794537, "grad_norm": 6.0217790603637695, "learning_rate": 3.46e-06, "loss": 0.6294, "step": 350 }, { "epoch": 0.15287403179779863, "grad_norm": 6.146008491516113, "learning_rate": 3.7100000000000005e-06, "loss": 0.6184, "step": 375 }, { "epoch": 0.16306563391765186, "grad_norm": 5.988298416137695, "learning_rate": 3.96e-06, "loss": 0.5581, "step": 400 }, { "epoch": 0.1732572360375051, "grad_norm": 6.24209451675415, "learning_rate": 4.21e-06, "loss": 0.526, "step": 425 }, { "epoch": 0.18344883815735832, "grad_norm": 5.728849411010742, "learning_rate": 4.4600000000000005e-06, "loss": 0.4973, "step": 450 }, { "epoch": 0.19364044027721158, "grad_norm": 5.146097660064697, "learning_rate": 4.71e-06, "loss": 0.4877, "step": 475 }, { "epoch": 0.20383204239706482, "grad_norm": 5.125807285308838, "learning_rate": 4.960000000000001e-06, "loss": 0.4718, "step": 500 }, { "epoch": 0.21402364451691805, "grad_norm": 5.3624653816223145, "learning_rate": 5.210000000000001e-06, "loss": 0.455, "step": 525 }, { "epoch": 0.2242152466367713, "grad_norm": 5.1377854347229, "learning_rate": 5.460000000000001e-06, "loss": 0.4426, "step": 550 }, { "epoch": 0.23440684875662454, "grad_norm": 5.11069393157959, "learning_rate": 5.71e-06, "loss": 0.4176, "step": 575 }, { "epoch": 0.24459845087647777, "grad_norm": 5.100331783294678, "learning_rate": 5.9600000000000005e-06, "loss": 0.4044, "step": 600 }, { "epoch": 0.254790052996331, "grad_norm": 5.771068572998047, "learning_rate": 6.210000000000001e-06, "loss": 0.3994, "step": 625 }, { "epoch": 0.26498165511618427, "grad_norm": 5.4431986808776855, "learning_rate": 6.460000000000001e-06, "loss": 0.366, "step": 650 }, { "epoch": 0.2751732572360375, "grad_norm": 5.490501880645752, "learning_rate": 6.710000000000001e-06, "loss": 0.3742, "step": 675 }, { "epoch": 0.28536485935589073, "grad_norm": 5.428119659423828, "learning_rate": 6.96e-06, "loss": 0.3577, "step": 700 }, { "epoch": 0.295556461475744, "grad_norm": 4.746588230133057, "learning_rate": 7.2100000000000004e-06, "loss": 0.3465, "step": 725 }, { "epoch": 0.30574806359559725, "grad_norm": 4.743890762329102, "learning_rate": 7.4600000000000006e-06, "loss": 0.3226, "step": 750 }, { "epoch": 0.31593966571545046, "grad_norm": 5.242619037628174, "learning_rate": 7.71e-06, "loss": 0.3185, "step": 775 }, { "epoch": 0.3261312678353037, "grad_norm": 4.178338050842285, "learning_rate": 7.960000000000002e-06, "loss": 0.32, "step": 800 }, { "epoch": 0.336322869955157, "grad_norm": 4.850783824920654, "learning_rate": 8.210000000000001e-06, "loss": 0.327, "step": 825 }, { "epoch": 0.3465144720750102, "grad_norm": 5.147852897644043, "learning_rate": 8.46e-06, "loss": 0.3334, "step": 850 }, { "epoch": 0.35670607419486344, "grad_norm": 4.720470905303955, "learning_rate": 8.710000000000001e-06, "loss": 0.2821, "step": 875 }, { "epoch": 0.36689767631471665, "grad_norm": 4.410367965698242, "learning_rate": 8.96e-06, "loss": 0.2894, "step": 900 }, { "epoch": 0.3770892784345699, "grad_norm": 4.329741477966309, "learning_rate": 9.210000000000002e-06, "loss": 0.2792, "step": 925 }, { "epoch": 0.38728088055442317, "grad_norm": 4.4180803298950195, "learning_rate": 9.460000000000001e-06, "loss": 0.2842, "step": 950 }, { "epoch": 0.3974724826742764, "grad_norm": 4.213972568511963, "learning_rate": 9.71e-06, "loss": 0.2854, "step": 975 }, { "epoch": 0.40766408479412963, "grad_norm": 3.454677104949951, "learning_rate": 9.960000000000001e-06, "loss": 0.2733, "step": 1000 }, { "epoch": 0.40766408479412963, "eval_loss": 0.2585422396659851, "eval_runtime": 2430.2946, "eval_samples_per_second": 4.195, "eval_steps_per_second": 0.131, "eval_wer": 32.5923736874671, "step": 1000 }, { "epoch": 0.4178556869139829, "grad_norm": 3.999333620071411, "learning_rate": 9.970000000000001e-06, "loss": 0.2649, "step": 1025 }, { "epoch": 0.4280472890338361, "grad_norm": 3.658658504486084, "learning_rate": 9.934285714285715e-06, "loss": 0.2494, "step": 1050 }, { "epoch": 0.43823889115368936, "grad_norm": 4.042171955108643, "learning_rate": 9.89857142857143e-06, "loss": 0.2626, "step": 1075 }, { "epoch": 0.4484304932735426, "grad_norm": 4.251903057098389, "learning_rate": 9.862857142857144e-06, "loss": 0.2457, "step": 1100 }, { "epoch": 0.4586220953933958, "grad_norm": 4.49993896484375, "learning_rate": 9.827142857142859e-06, "loss": 0.2598, "step": 1125 }, { "epoch": 0.4688136975132491, "grad_norm": 4.043424129486084, "learning_rate": 9.791428571428571e-06, "loss": 0.2447, "step": 1150 }, { "epoch": 0.47900529963310234, "grad_norm": 3.787559747695923, "learning_rate": 9.755714285714286e-06, "loss": 0.2328, "step": 1175 }, { "epoch": 0.48919690175295555, "grad_norm": 3.875075340270996, "learning_rate": 9.72e-06, "loss": 0.2359, "step": 1200 }, { "epoch": 0.4993885038728088, "grad_norm": 4.148637294769287, "learning_rate": 9.684285714285715e-06, "loss": 0.2234, "step": 1225 }, { "epoch": 0.509580105992662, "grad_norm": 3.9952499866485596, "learning_rate": 9.648571428571429e-06, "loss": 0.225, "step": 1250 }, { "epoch": 0.5197717081125153, "grad_norm": 3.0705387592315674, "learning_rate": 9.612857142857144e-06, "loss": 0.2183, "step": 1275 }, { "epoch": 0.5299633102323685, "grad_norm": 3.5826079845428467, "learning_rate": 9.577142857142858e-06, "loss": 0.2125, "step": 1300 }, { "epoch": 0.5401549123522218, "grad_norm": 3.478602170944214, "learning_rate": 9.541428571428572e-06, "loss": 0.2067, "step": 1325 }, { "epoch": 0.550346514472075, "grad_norm": 3.646801233291626, "learning_rate": 9.505714285714287e-06, "loss": 0.2132, "step": 1350 }, { "epoch": 0.5605381165919282, "grad_norm": 4.240411758422852, "learning_rate": 9.47e-06, "loss": 0.2077, "step": 1375 }, { "epoch": 0.5707297187117815, "grad_norm": 3.531076669692993, "learning_rate": 9.434285714285714e-06, "loss": 0.1978, "step": 1400 }, { "epoch": 0.5809213208316347, "grad_norm": 4.578576564788818, "learning_rate": 9.39857142857143e-06, "loss": 0.1976, "step": 1425 }, { "epoch": 0.591112922951488, "grad_norm": 3.237501382827759, "learning_rate": 9.362857142857143e-06, "loss": 0.1829, "step": 1450 }, { "epoch": 0.6013045250713412, "grad_norm": 3.9498815536499023, "learning_rate": 9.327142857142857e-06, "loss": 0.1926, "step": 1475 }, { "epoch": 0.6114961271911945, "grad_norm": 3.386265277862549, "learning_rate": 9.291428571428572e-06, "loss": 0.1829, "step": 1500 }, { "epoch": 0.6216877293110477, "grad_norm": 3.7739858627319336, "learning_rate": 9.255714285714286e-06, "loss": 0.1807, "step": 1525 }, { "epoch": 0.6318793314309009, "grad_norm": 2.9203953742980957, "learning_rate": 9.220000000000002e-06, "loss": 0.1816, "step": 1550 }, { "epoch": 0.6420709335507542, "grad_norm": 3.465407371520996, "learning_rate": 9.184285714285715e-06, "loss": 0.1739, "step": 1575 }, { "epoch": 0.6522625356706074, "grad_norm": 3.1187219619750977, "learning_rate": 9.148571428571429e-06, "loss": 0.177, "step": 1600 }, { "epoch": 0.6624541377904607, "grad_norm": 3.4814960956573486, "learning_rate": 9.112857142857142e-06, "loss": 0.1718, "step": 1625 }, { "epoch": 0.672645739910314, "grad_norm": 3.3516340255737305, "learning_rate": 9.077142857142858e-06, "loss": 0.1682, "step": 1650 }, { "epoch": 0.6828373420301671, "grad_norm": 3.460319757461548, "learning_rate": 9.041428571428572e-06, "loss": 0.1645, "step": 1675 }, { "epoch": 0.6930289441500204, "grad_norm": 2.6608901023864746, "learning_rate": 9.005714285714287e-06, "loss": 0.1721, "step": 1700 }, { "epoch": 0.7032205462698736, "grad_norm": 3.2124173641204834, "learning_rate": 8.97e-06, "loss": 0.1647, "step": 1725 }, { "epoch": 0.7134121483897269, "grad_norm": 3.007030725479126, "learning_rate": 8.934285714285716e-06, "loss": 0.163, "step": 1750 }, { "epoch": 0.7236037505095801, "grad_norm": 2.7804579734802246, "learning_rate": 8.89857142857143e-06, "loss": 0.1732, "step": 1775 }, { "epoch": 0.7337953526294333, "grad_norm": 3.615187644958496, "learning_rate": 8.862857142857143e-06, "loss": 0.1578, "step": 1800 }, { "epoch": 0.7439869547492866, "grad_norm": 2.8256614208221436, "learning_rate": 8.827142857142857e-06, "loss": 0.1553, "step": 1825 }, { "epoch": 0.7541785568691398, "grad_norm": 2.9422616958618164, "learning_rate": 8.791428571428572e-06, "loss": 0.1483, "step": 1850 }, { "epoch": 0.7643701589889931, "grad_norm": 2.9863955974578857, "learning_rate": 8.755714285714286e-06, "loss": 0.1446, "step": 1875 }, { "epoch": 0.7745617611088463, "grad_norm": 3.2235851287841797, "learning_rate": 8.720000000000001e-06, "loss": 0.1488, "step": 1900 }, { "epoch": 0.7847533632286996, "grad_norm": 3.379194974899292, "learning_rate": 8.684285714285715e-06, "loss": 0.1433, "step": 1925 }, { "epoch": 0.7949449653485527, "grad_norm": 3.080751895904541, "learning_rate": 8.64857142857143e-06, "loss": 0.1428, "step": 1950 }, { "epoch": 0.805136567468406, "grad_norm": 2.6511690616607666, "learning_rate": 8.612857142857144e-06, "loss": 0.1351, "step": 1975 }, { "epoch": 0.8153281695882593, "grad_norm": 3.0043816566467285, "learning_rate": 8.577142857142858e-06, "loss": 0.1527, "step": 2000 }, { "epoch": 0.8153281695882593, "eval_loss": 0.12457678467035294, "eval_runtime": 2416.9884, "eval_samples_per_second": 4.218, "eval_steps_per_second": 0.132, "eval_wer": 16.723770120886936, "step": 2000 }, { "epoch": 0.8255197717081125, "grad_norm": 2.428893804550171, "learning_rate": 8.541428571428571e-06, "loss": 0.1329, "step": 2025 }, { "epoch": 0.8357113738279658, "grad_norm": 2.5849223136901855, "learning_rate": 8.505714285714287e-06, "loss": 0.1355, "step": 2050 }, { "epoch": 0.845902975947819, "grad_norm": 2.7230465412139893, "learning_rate": 8.47e-06, "loss": 0.1409, "step": 2075 }, { "epoch": 0.8560945780676722, "grad_norm": 2.889012336730957, "learning_rate": 8.434285714285716e-06, "loss": 0.1339, "step": 2100 }, { "epoch": 0.8662861801875255, "grad_norm": 2.4034271240234375, "learning_rate": 8.39857142857143e-06, "loss": 0.1397, "step": 2125 }, { "epoch": 0.8764777823073787, "grad_norm": 2.2975339889526367, "learning_rate": 8.362857142857143e-06, "loss": 0.1254, "step": 2150 }, { "epoch": 0.886669384427232, "grad_norm": 2.7864370346069336, "learning_rate": 8.327142857142858e-06, "loss": 0.1298, "step": 2175 }, { "epoch": 0.8968609865470852, "grad_norm": 3.3211889266967773, "learning_rate": 8.291428571428572e-06, "loss": 0.1195, "step": 2200 }, { "epoch": 0.9070525886669385, "grad_norm": 2.9740707874298096, "learning_rate": 8.255714285714287e-06, "loss": 0.1236, "step": 2225 }, { "epoch": 0.9172441907867916, "grad_norm": 2.8324942588806152, "learning_rate": 8.220000000000001e-06, "loss": 0.1256, "step": 2250 }, { "epoch": 0.9274357929066449, "grad_norm": 2.7244651317596436, "learning_rate": 8.184285714285715e-06, "loss": 0.1232, "step": 2275 }, { "epoch": 0.9376273950264982, "grad_norm": 2.6477315425872803, "learning_rate": 8.148571428571428e-06, "loss": 0.1194, "step": 2300 }, { "epoch": 0.9478189971463514, "grad_norm": 2.737881660461426, "learning_rate": 8.112857142857144e-06, "loss": 0.1184, "step": 2325 }, { "epoch": 0.9580105992662047, "grad_norm": 3.096505641937256, "learning_rate": 8.077142857142857e-06, "loss": 0.1316, "step": 2350 }, { "epoch": 0.9682022013860578, "grad_norm": 2.7507877349853516, "learning_rate": 8.041428571428573e-06, "loss": 0.109, "step": 2375 }, { "epoch": 0.9783938035059111, "grad_norm": 2.7306649684906006, "learning_rate": 8.005714285714286e-06, "loss": 0.1263, "step": 2400 }, { "epoch": 0.9885854056257644, "grad_norm": 3.1028060913085938, "learning_rate": 7.970000000000002e-06, "loss": 0.1135, "step": 2425 }, { "epoch": 0.9987770077456176, "grad_norm": 2.456820011138916, "learning_rate": 7.934285714285715e-06, "loss": 0.1192, "step": 2450 }, { "epoch": 1.0089686098654709, "grad_norm": 2.0729124546051025, "learning_rate": 7.898571428571429e-06, "loss": 0.0789, "step": 2475 }, { "epoch": 1.019160211985324, "grad_norm": 2.0284624099731445, "learning_rate": 7.862857142857143e-06, "loss": 0.0741, "step": 2500 }, { "epoch": 1.0293518141051774, "grad_norm": 1.9724788665771484, "learning_rate": 7.827142857142858e-06, "loss": 0.068, "step": 2525 }, { "epoch": 1.0395434162250305, "grad_norm": 2.1046948432922363, "learning_rate": 7.791428571428572e-06, "loss": 0.0785, "step": 2550 }, { "epoch": 1.049735018344884, "grad_norm": 2.5051779747009277, "learning_rate": 7.755714285714287e-06, "loss": 0.0788, "step": 2575 }, { "epoch": 1.059926620464737, "grad_norm": 2.253950357437134, "learning_rate": 7.72e-06, "loss": 0.0748, "step": 2600 }, { "epoch": 1.0701182225845902, "grad_norm": 2.03273868560791, "learning_rate": 7.684285714285716e-06, "loss": 0.0795, "step": 2625 }, { "epoch": 1.0803098247044436, "grad_norm": 1.9357993602752686, "learning_rate": 7.64857142857143e-06, "loss": 0.0688, "step": 2650 }, { "epoch": 1.0905014268242967, "grad_norm": 2.5450971126556396, "learning_rate": 7.612857142857143e-06, "loss": 0.0807, "step": 2675 }, { "epoch": 1.10069302894415, "grad_norm": 2.2401187419891357, "learning_rate": 7.577142857142857e-06, "loss": 0.0741, "step": 2700 }, { "epoch": 1.1108846310640033, "grad_norm": 2.1422953605651855, "learning_rate": 7.5414285714285715e-06, "loss": 0.0761, "step": 2725 }, { "epoch": 1.1210762331838564, "grad_norm": 2.204946994781494, "learning_rate": 7.505714285714286e-06, "loss": 0.0726, "step": 2750 }, { "epoch": 1.1312678353037098, "grad_norm": 2.2459845542907715, "learning_rate": 7.4700000000000005e-06, "loss": 0.0751, "step": 2775 }, { "epoch": 1.141459437423563, "grad_norm": 2.2025928497314453, "learning_rate": 7.434285714285715e-06, "loss": 0.074, "step": 2800 }, { "epoch": 1.1516510395434163, "grad_norm": 1.94180166721344, "learning_rate": 7.3985714285714295e-06, "loss": 0.0768, "step": 2825 }, { "epoch": 1.1618426416632694, "grad_norm": 2.4948198795318604, "learning_rate": 7.362857142857144e-06, "loss": 0.0737, "step": 2850 }, { "epoch": 1.1720342437831226, "grad_norm": 2.136427640914917, "learning_rate": 7.3271428571428585e-06, "loss": 0.0711, "step": 2875 }, { "epoch": 1.182225845902976, "grad_norm": 1.7289948463439941, "learning_rate": 7.291428571428571e-06, "loss": 0.0713, "step": 2900 }, { "epoch": 1.1924174480228291, "grad_norm": 2.254673957824707, "learning_rate": 7.255714285714286e-06, "loss": 0.0696, "step": 2925 }, { "epoch": 1.2026090501426825, "grad_norm": 2.0539391040802, "learning_rate": 7.22e-06, "loss": 0.0669, "step": 2950 }, { "epoch": 1.2128006522625356, "grad_norm": 2.1277451515197754, "learning_rate": 7.184285714285715e-06, "loss": 0.0682, "step": 2975 }, { "epoch": 1.222992254382389, "grad_norm": 1.7704741954803467, "learning_rate": 7.148571428571429e-06, "loss": 0.0655, "step": 3000 }, { "epoch": 1.222992254382389, "eval_loss": 0.07764188200235367, "eval_runtime": 2406.4856, "eval_samples_per_second": 4.237, "eval_steps_per_second": 0.133, "eval_wer": 10.566755631077823, "step": 3000 }, { "epoch": 1.2331838565022422, "grad_norm": 2.0602128505706787, "learning_rate": 7.112857142857144e-06, "loss": 0.0667, "step": 3025 }, { "epoch": 1.2433754586220953, "grad_norm": 2.0332977771759033, "learning_rate": 7.077142857142858e-06, "loss": 0.0656, "step": 3050 }, { "epoch": 1.2535670607419487, "grad_norm": 2.4050822257995605, "learning_rate": 7.041428571428572e-06, "loss": 0.0624, "step": 3075 }, { "epoch": 1.2637586628618018, "grad_norm": 2.1430561542510986, "learning_rate": 7.0057142857142865e-06, "loss": 0.0805, "step": 3100 }, { "epoch": 1.273950264981655, "grad_norm": 2.0134341716766357, "learning_rate": 6.97e-06, "loss": 0.0664, "step": 3125 }, { "epoch": 1.2841418671015083, "grad_norm": 2.217721462249756, "learning_rate": 6.934285714285715e-06, "loss": 0.0745, "step": 3150 }, { "epoch": 1.2943334692213617, "grad_norm": 1.5617090463638306, "learning_rate": 6.898571428571429e-06, "loss": 0.0648, "step": 3175 }, { "epoch": 1.3045250713412149, "grad_norm": 1.9269059896469116, "learning_rate": 6.862857142857144e-06, "loss": 0.0618, "step": 3200 }, { "epoch": 1.314716673461068, "grad_norm": 2.0952699184417725, "learning_rate": 6.827142857142857e-06, "loss": 0.0706, "step": 3225 }, { "epoch": 1.3249082755809214, "grad_norm": 1.764196753501892, "learning_rate": 6.791428571428572e-06, "loss": 0.0658, "step": 3250 }, { "epoch": 1.3350998777007745, "grad_norm": 2.4948887825012207, "learning_rate": 6.755714285714286e-06, "loss": 0.0573, "step": 3275 }, { "epoch": 1.3452914798206277, "grad_norm": 2.2995638847351074, "learning_rate": 6.720000000000001e-06, "loss": 0.0584, "step": 3300 }, { "epoch": 1.355483081940481, "grad_norm": 1.6779873371124268, "learning_rate": 6.684285714285715e-06, "loss": 0.0618, "step": 3325 }, { "epoch": 1.3656746840603342, "grad_norm": 2.435180425643921, "learning_rate": 6.648571428571429e-06, "loss": 0.0611, "step": 3350 }, { "epoch": 1.3758662861801876, "grad_norm": 1.7600586414337158, "learning_rate": 6.612857142857143e-06, "loss": 0.0638, "step": 3375 }, { "epoch": 1.3860578883000407, "grad_norm": 2.179009199142456, "learning_rate": 6.577142857142857e-06, "loss": 0.0577, "step": 3400 }, { "epoch": 1.396249490419894, "grad_norm": 1.8130829334259033, "learning_rate": 6.541428571428572e-06, "loss": 0.0572, "step": 3425 }, { "epoch": 1.4064410925397473, "grad_norm": 2.2357475757598877, "learning_rate": 6.505714285714286e-06, "loss": 0.0617, "step": 3450 }, { "epoch": 1.4166326946596004, "grad_norm": 2.523681163787842, "learning_rate": 6.470000000000001e-06, "loss": 0.0592, "step": 3475 }, { "epoch": 1.4268242967794538, "grad_norm": 1.9920995235443115, "learning_rate": 6.434285714285715e-06, "loss": 0.0594, "step": 3500 }, { "epoch": 1.437015898899307, "grad_norm": 1.876678705215454, "learning_rate": 6.39857142857143e-06, "loss": 0.0588, "step": 3525 }, { "epoch": 1.4472075010191603, "grad_norm": 2.1072115898132324, "learning_rate": 6.3628571428571426e-06, "loss": 0.0584, "step": 3550 }, { "epoch": 1.4573991031390134, "grad_norm": 1.9492688179016113, "learning_rate": 6.327142857142857e-06, "loss": 0.0619, "step": 3575 }, { "epoch": 1.4675907052588668, "grad_norm": 1.9974353313446045, "learning_rate": 6.2914285714285716e-06, "loss": 0.0526, "step": 3600 }, { "epoch": 1.47778230737872, "grad_norm": 2.0630576610565186, "learning_rate": 6.255714285714286e-06, "loss": 0.0587, "step": 3625 }, { "epoch": 1.487973909498573, "grad_norm": 2.312988042831421, "learning_rate": 6.220000000000001e-06, "loss": 0.0539, "step": 3650 }, { "epoch": 1.4981655116184265, "grad_norm": 2.3259403705596924, "learning_rate": 6.184285714285715e-06, "loss": 0.0547, "step": 3675 }, { "epoch": 1.5083571137382796, "grad_norm": 1.7665891647338867, "learning_rate": 6.14857142857143e-06, "loss": 0.0546, "step": 3700 }, { "epoch": 1.5185487158581328, "grad_norm": 1.8702497482299805, "learning_rate": 6.112857142857144e-06, "loss": 0.059, "step": 3725 }, { "epoch": 1.5287403179779862, "grad_norm": 1.725127935409546, "learning_rate": 6.077142857142858e-06, "loss": 0.0593, "step": 3750 }, { "epoch": 1.5389319200978395, "grad_norm": 1.6888962984085083, "learning_rate": 6.0414285714285714e-06, "loss": 0.0492, "step": 3775 }, { "epoch": 1.5491235222176927, "grad_norm": 1.8041000366210938, "learning_rate": 6.005714285714286e-06, "loss": 0.0524, "step": 3800 }, { "epoch": 1.5593151243375458, "grad_norm": 2.0858354568481445, "learning_rate": 5.9700000000000004e-06, "loss": 0.0532, "step": 3825 }, { "epoch": 1.5695067264573992, "grad_norm": 2.3499724864959717, "learning_rate": 5.934285714285715e-06, "loss": 0.0509, "step": 3850 }, { "epoch": 1.5796983285772523, "grad_norm": 1.6974126100540161, "learning_rate": 5.8985714285714295e-06, "loss": 0.0525, "step": 3875 }, { "epoch": 1.5898899306971055, "grad_norm": 2.386164426803589, "learning_rate": 5.862857142857143e-06, "loss": 0.056, "step": 3900 }, { "epoch": 1.6000815328169589, "grad_norm": 2.2181949615478516, "learning_rate": 5.827142857142858e-06, "loss": 0.0532, "step": 3925 }, { "epoch": 1.610273134936812, "grad_norm": 2.883762836456299, "learning_rate": 5.791428571428572e-06, "loss": 0.0554, "step": 3950 }, { "epoch": 1.6204647370566652, "grad_norm": 2.2483813762664795, "learning_rate": 5.755714285714287e-06, "loss": 0.053, "step": 3975 }, { "epoch": 1.6306563391765185, "grad_norm": 1.992173433303833, "learning_rate": 5.72e-06, "loss": 0.0455, "step": 4000 }, { "epoch": 1.6306563391765185, "eval_loss": 0.05135625973343849, "eval_runtime": 2404.8469, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.133, "eval_wer": 6.767451954600445, "step": 4000 }, { "epoch": 1.640847941296372, "grad_norm": 1.7579491138458252, "learning_rate": 5.684285714285715e-06, "loss": 0.0521, "step": 4025 }, { "epoch": 1.651039543416225, "grad_norm": 1.9420897960662842, "learning_rate": 5.6485714285714285e-06, "loss": 0.0542, "step": 4050 }, { "epoch": 1.6612311455360782, "grad_norm": 2.2500171661376953, "learning_rate": 5.612857142857143e-06, "loss": 0.0516, "step": 4075 }, { "epoch": 1.6714227476559316, "grad_norm": 1.7925150394439697, "learning_rate": 5.5771428571428575e-06, "loss": 0.0493, "step": 4100 }, { "epoch": 1.6816143497757847, "grad_norm": 2.5120983123779297, "learning_rate": 5.541428571428572e-06, "loss": 0.0496, "step": 4125 }, { "epoch": 1.6918059518956379, "grad_norm": 1.6827411651611328, "learning_rate": 5.5057142857142865e-06, "loss": 0.048, "step": 4150 }, { "epoch": 1.7019975540154912, "grad_norm": 1.951802134513855, "learning_rate": 5.470000000000001e-06, "loss": 0.0491, "step": 4175 }, { "epoch": 1.7121891561353446, "grad_norm": 1.688637375831604, "learning_rate": 5.4342857142857155e-06, "loss": 0.0515, "step": 4200 }, { "epoch": 1.7223807582551978, "grad_norm": 2.034071207046509, "learning_rate": 5.398571428571428e-06, "loss": 0.0516, "step": 4225 }, { "epoch": 1.732572360375051, "grad_norm": 1.9581401348114014, "learning_rate": 5.362857142857143e-06, "loss": 0.0536, "step": 4250 }, { "epoch": 1.7427639624949043, "grad_norm": 1.8898799419403076, "learning_rate": 5.327142857142857e-06, "loss": 0.0491, "step": 4275 }, { "epoch": 1.7529555646147574, "grad_norm": 2.1761457920074463, "learning_rate": 5.291428571428572e-06, "loss": 0.0496, "step": 4300 }, { "epoch": 1.7631471667346106, "grad_norm": 1.9841150045394897, "learning_rate": 5.255714285714286e-06, "loss": 0.0469, "step": 4325 }, { "epoch": 1.773338768854464, "grad_norm": 1.3026888370513916, "learning_rate": 5.220000000000001e-06, "loss": 0.0482, "step": 4350 }, { "epoch": 1.7835303709743173, "grad_norm": 1.3047564029693604, "learning_rate": 5.184285714285715e-06, "loss": 0.0493, "step": 4375 }, { "epoch": 1.7937219730941703, "grad_norm": 1.6902995109558105, "learning_rate": 5.14857142857143e-06, "loss": 0.0494, "step": 4400 }, { "epoch": 1.8039135752140236, "grad_norm": 2.262953758239746, "learning_rate": 5.112857142857143e-06, "loss": 0.0476, "step": 4425 }, { "epoch": 1.814105177333877, "grad_norm": 1.777170181274414, "learning_rate": 5.077142857142857e-06, "loss": 0.0471, "step": 4450 }, { "epoch": 1.8242967794537301, "grad_norm": 1.9193710088729858, "learning_rate": 5.041428571428572e-06, "loss": 0.0467, "step": 4475 }, { "epoch": 1.8344883815735833, "grad_norm": 1.8790152072906494, "learning_rate": 5.005714285714286e-06, "loss": 0.0487, "step": 4500 }, { "epoch": 1.8446799836934367, "grad_norm": 2.3190362453460693, "learning_rate": 4.970000000000001e-06, "loss": 0.0466, "step": 4525 }, { "epoch": 1.8548715858132898, "grad_norm": 1.821876049041748, "learning_rate": 4.934285714285715e-06, "loss": 0.0456, "step": 4550 }, { "epoch": 1.865063187933143, "grad_norm": 1.3371105194091797, "learning_rate": 4.898571428571429e-06, "loss": 0.0414, "step": 4575 }, { "epoch": 1.8752547900529963, "grad_norm": 1.5593416690826416, "learning_rate": 4.862857142857143e-06, "loss": 0.0435, "step": 4600 }, { "epoch": 1.8854463921728497, "grad_norm": 1.9036990404129028, "learning_rate": 4.827142857142858e-06, "loss": 0.0448, "step": 4625 }, { "epoch": 1.8956379942927029, "grad_norm": 1.8190467357635498, "learning_rate": 4.7914285714285715e-06, "loss": 0.0388, "step": 4650 }, { "epoch": 1.905829596412556, "grad_norm": 1.7529155015945435, "learning_rate": 4.755714285714286e-06, "loss": 0.0421, "step": 4675 }, { "epoch": 1.9160211985324094, "grad_norm": 2.084097385406494, "learning_rate": 4.7200000000000005e-06, "loss": 0.0464, "step": 4700 }, { "epoch": 1.9262128006522625, "grad_norm": 1.569948673248291, "learning_rate": 4.684285714285714e-06, "loss": 0.0438, "step": 4725 }, { "epoch": 1.9364044027721157, "grad_norm": 2.0869407653808594, "learning_rate": 4.648571428571429e-06, "loss": 0.0421, "step": 4750 }, { "epoch": 1.946596004891969, "grad_norm": 1.8481653928756714, "learning_rate": 4.612857142857143e-06, "loss": 0.0429, "step": 4775 }, { "epoch": 1.9567876070118224, "grad_norm": 2.0351133346557617, "learning_rate": 4.577142857142858e-06, "loss": 0.0431, "step": 4800 }, { "epoch": 1.9669792091316753, "grad_norm": 1.88765549659729, "learning_rate": 4.541428571428571e-06, "loss": 0.0448, "step": 4825 }, { "epoch": 1.9771708112515287, "grad_norm": 1.8413023948669434, "learning_rate": 4.505714285714286e-06, "loss": 0.0391, "step": 4850 }, { "epoch": 1.987362413371382, "grad_norm": 2.0287082195281982, "learning_rate": 4.47e-06, "loss": 0.0417, "step": 4875 }, { "epoch": 1.9975540154912352, "grad_norm": 2.1331636905670166, "learning_rate": 4.434285714285715e-06, "loss": 0.0434, "step": 4900 }, { "epoch": 2.0077456176110884, "grad_norm": 1.133825421333313, "learning_rate": 4.3985714285714286e-06, "loss": 0.0246, "step": 4925 }, { "epoch": 2.0179372197309418, "grad_norm": 1.3025981187820435, "learning_rate": 4.362857142857143e-06, "loss": 0.017, "step": 4950 }, { "epoch": 2.028128821850795, "grad_norm": 1.008670687675476, "learning_rate": 4.327142857142858e-06, "loss": 0.0174, "step": 4975 }, { "epoch": 2.038320423970648, "grad_norm": 0.9607629179954529, "learning_rate": 4.291428571428572e-06, "loss": 0.0162, "step": 5000 }, { "epoch": 2.038320423970648, "eval_loss": 0.0353175513446331, "eval_runtime": 2408.5196, "eval_samples_per_second": 4.233, "eval_steps_per_second": 0.132, "eval_wer": 4.477157079135229, "step": 5000 }, { "epoch": 2.0485120260905014, "grad_norm": 1.6613744497299194, "learning_rate": 4.255714285714286e-06, "loss": 0.0213, "step": 5025 }, { "epoch": 2.058703628210355, "grad_norm": 0.6007469296455383, "learning_rate": 4.22e-06, "loss": 0.015, "step": 5050 }, { "epoch": 2.0688952303302077, "grad_norm": 1.5992757081985474, "learning_rate": 4.184285714285715e-06, "loss": 0.0191, "step": 5075 }, { "epoch": 2.079086832450061, "grad_norm": 0.913711667060852, "learning_rate": 4.148571428571429e-06, "loss": 0.0155, "step": 5100 }, { "epoch": 2.0892784345699145, "grad_norm": 1.1569514274597168, "learning_rate": 4.112857142857144e-06, "loss": 0.0164, "step": 5125 }, { "epoch": 2.099470036689768, "grad_norm": 1.0935266017913818, "learning_rate": 4.0771428571428574e-06, "loss": 0.0146, "step": 5150 }, { "epoch": 2.1096616388096208, "grad_norm": 0.8869621157646179, "learning_rate": 4.041428571428572e-06, "loss": 0.0168, "step": 5175 }, { "epoch": 2.119853240929474, "grad_norm": 1.3779696226119995, "learning_rate": 4.0057142857142864e-06, "loss": 0.0164, "step": 5200 }, { "epoch": 2.1300448430493275, "grad_norm": 1.2132755517959595, "learning_rate": 3.97e-06, "loss": 0.0178, "step": 5225 }, { "epoch": 2.1402364451691804, "grad_norm": 1.8169926404953003, "learning_rate": 3.934285714285715e-06, "loss": 0.0181, "step": 5250 }, { "epoch": 2.150428047289034, "grad_norm": 0.7188290357589722, "learning_rate": 3.898571428571429e-06, "loss": 0.0172, "step": 5275 }, { "epoch": 2.160619649408887, "grad_norm": 1.0363893508911133, "learning_rate": 3.862857142857143e-06, "loss": 0.0151, "step": 5300 }, { "epoch": 2.1708112515287405, "grad_norm": 0.7467776536941528, "learning_rate": 3.827142857142857e-06, "loss": 0.0159, "step": 5325 }, { "epoch": 2.1810028536485935, "grad_norm": 1.4140534400939941, "learning_rate": 3.7914285714285722e-06, "loss": 0.0179, "step": 5350 }, { "epoch": 2.191194455768447, "grad_norm": 0.831071138381958, "learning_rate": 3.755714285714286e-06, "loss": 0.018, "step": 5375 }, { "epoch": 2.2013860578883, "grad_norm": 0.978151261806488, "learning_rate": 3.7200000000000004e-06, "loss": 0.0164, "step": 5400 }, { "epoch": 2.211577660008153, "grad_norm": 1.2061834335327148, "learning_rate": 3.684285714285715e-06, "loss": 0.0155, "step": 5425 }, { "epoch": 2.2217692621280065, "grad_norm": 1.0183775424957275, "learning_rate": 3.648571428571429e-06, "loss": 0.0166, "step": 5450 }, { "epoch": 2.23196086424786, "grad_norm": 0.7747207283973694, "learning_rate": 3.612857142857143e-06, "loss": 0.017, "step": 5475 }, { "epoch": 2.242152466367713, "grad_norm": 1.3631898164749146, "learning_rate": 3.5771428571428576e-06, "loss": 0.0147, "step": 5500 }, { "epoch": 2.252344068487566, "grad_norm": 2.3850202560424805, "learning_rate": 3.5414285714285716e-06, "loss": 0.0145, "step": 5525 }, { "epoch": 2.2625356706074196, "grad_norm": 1.2588169574737549, "learning_rate": 3.505714285714286e-06, "loss": 0.0191, "step": 5550 }, { "epoch": 2.2727272727272725, "grad_norm": 1.3466730117797852, "learning_rate": 3.4700000000000002e-06, "loss": 0.0177, "step": 5575 }, { "epoch": 2.282918874847126, "grad_norm": 1.3090860843658447, "learning_rate": 3.4342857142857143e-06, "loss": 0.014, "step": 5600 }, { "epoch": 2.2931104769669792, "grad_norm": 1.4974130392074585, "learning_rate": 3.398571428571429e-06, "loss": 0.0152, "step": 5625 }, { "epoch": 2.3033020790868326, "grad_norm": 1.342842936515808, "learning_rate": 3.3628571428571433e-06, "loss": 0.0165, "step": 5650 }, { "epoch": 2.3134936812066855, "grad_norm": 0.9690020084381104, "learning_rate": 3.327142857142858e-06, "loss": 0.0146, "step": 5675 }, { "epoch": 2.323685283326539, "grad_norm": 0.4699741303920746, "learning_rate": 3.2914285714285715e-06, "loss": 0.0178, "step": 5700 }, { "epoch": 2.3338768854463923, "grad_norm": 1.166608214378357, "learning_rate": 3.255714285714286e-06, "loss": 0.0152, "step": 5725 }, { "epoch": 2.344068487566245, "grad_norm": 0.7220166921615601, "learning_rate": 3.2200000000000005e-06, "loss": 0.0142, "step": 5750 }, { "epoch": 2.3542600896860986, "grad_norm": 0.8474377989768982, "learning_rate": 3.1842857142857146e-06, "loss": 0.0186, "step": 5775 }, { "epoch": 2.364451691805952, "grad_norm": 1.246468186378479, "learning_rate": 3.1485714285714287e-06, "loss": 0.0143, "step": 5800 }, { "epoch": 2.3746432939258053, "grad_norm": 0.9267088770866394, "learning_rate": 3.112857142857143e-06, "loss": 0.0157, "step": 5825 }, { "epoch": 2.3848348960456582, "grad_norm": 1.0727224349975586, "learning_rate": 3.0771428571428573e-06, "loss": 0.0145, "step": 5850 }, { "epoch": 2.3950264981655116, "grad_norm": 1.3142344951629639, "learning_rate": 3.0414285714285718e-06, "loss": 0.0133, "step": 5875 }, { "epoch": 2.405218100285365, "grad_norm": 1.3349863290786743, "learning_rate": 3.005714285714286e-06, "loss": 0.0137, "step": 5900 }, { "epoch": 2.415409702405218, "grad_norm": 0.7742011547088623, "learning_rate": 2.97e-06, "loss": 0.0149, "step": 5925 }, { "epoch": 2.4256013045250713, "grad_norm": 1.1534104347229004, "learning_rate": 2.9342857142857144e-06, "loss": 0.0156, "step": 5950 }, { "epoch": 2.4357929066449246, "grad_norm": 1.1034826040267944, "learning_rate": 2.898571428571429e-06, "loss": 0.0136, "step": 5975 }, { "epoch": 2.445984508764778, "grad_norm": 1.3665175437927246, "learning_rate": 2.8628571428571435e-06, "loss": 0.0129, "step": 6000 }, { "epoch": 2.445984508764778, "eval_loss": 0.027366982772946358, "eval_runtime": 2410.6864, "eval_samples_per_second": 4.23, "eval_steps_per_second": 0.132, "eval_wer": 3.436365819195996, "step": 6000 }, { "epoch": 2.456176110884631, "grad_norm": 0.9552819728851318, "learning_rate": 2.827142857142857e-06, "loss": 0.015, "step": 6025 }, { "epoch": 2.4663677130044843, "grad_norm": 0.8750997185707092, "learning_rate": 2.7914285714285716e-06, "loss": 0.014, "step": 6050 }, { "epoch": 2.4765593151243377, "grad_norm": 1.3504140377044678, "learning_rate": 2.755714285714286e-06, "loss": 0.0172, "step": 6075 }, { "epoch": 2.4867509172441906, "grad_norm": 1.59817373752594, "learning_rate": 2.7200000000000002e-06, "loss": 0.0153, "step": 6100 }, { "epoch": 2.496942519364044, "grad_norm": 1.6574933528900146, "learning_rate": 2.6842857142857143e-06, "loss": 0.0167, "step": 6125 }, { "epoch": 2.5071341214838974, "grad_norm": 0.7508680820465088, "learning_rate": 2.648571428571429e-06, "loss": 0.013, "step": 6150 }, { "epoch": 2.5173257236037507, "grad_norm": 1.0604500770568848, "learning_rate": 2.612857142857143e-06, "loss": 0.0162, "step": 6175 }, { "epoch": 2.5275173257236037, "grad_norm": 1.3438018560409546, "learning_rate": 2.5771428571428574e-06, "loss": 0.0141, "step": 6200 }, { "epoch": 2.537708927843457, "grad_norm": 1.3562772274017334, "learning_rate": 2.541428571428572e-06, "loss": 0.0134, "step": 6225 }, { "epoch": 2.54790052996331, "grad_norm": 0.681122362613678, "learning_rate": 2.5057142857142856e-06, "loss": 0.0123, "step": 6250 }, { "epoch": 2.5580921320831633, "grad_norm": 0.8060537576675415, "learning_rate": 2.47e-06, "loss": 0.0143, "step": 6275 }, { "epoch": 2.5682837342030167, "grad_norm": 0.9794915318489075, "learning_rate": 2.4342857142857146e-06, "loss": 0.0128, "step": 6300 }, { "epoch": 2.57847533632287, "grad_norm": 0.7749600410461426, "learning_rate": 2.3985714285714287e-06, "loss": 0.0132, "step": 6325 }, { "epoch": 2.5886669384427234, "grad_norm": 0.808312177658081, "learning_rate": 2.362857142857143e-06, "loss": 0.0166, "step": 6350 }, { "epoch": 2.5988585405625764, "grad_norm": 1.0200546979904175, "learning_rate": 2.3271428571428572e-06, "loss": 0.0138, "step": 6375 }, { "epoch": 2.6090501426824297, "grad_norm": 1.4276586771011353, "learning_rate": 2.2914285714285718e-06, "loss": 0.0119, "step": 6400 }, { "epoch": 2.6192417448022827, "grad_norm": 1.1479567289352417, "learning_rate": 2.255714285714286e-06, "loss": 0.0127, "step": 6425 }, { "epoch": 2.629433346922136, "grad_norm": 0.9421886205673218, "learning_rate": 2.2200000000000003e-06, "loss": 0.0132, "step": 6450 }, { "epoch": 2.6396249490419894, "grad_norm": 0.573397696018219, "learning_rate": 2.1842857142857144e-06, "loss": 0.0131, "step": 6475 }, { "epoch": 2.649816551161843, "grad_norm": 1.2187764644622803, "learning_rate": 2.148571428571429e-06, "loss": 0.0125, "step": 6500 }, { "epoch": 2.660008153281696, "grad_norm": 0.7359505891799927, "learning_rate": 2.112857142857143e-06, "loss": 0.0141, "step": 6525 }, { "epoch": 2.670199755401549, "grad_norm": 1.8559486865997314, "learning_rate": 2.077142857142857e-06, "loss": 0.0142, "step": 6550 }, { "epoch": 2.6803913575214025, "grad_norm": 1.0747308731079102, "learning_rate": 2.0414285714285716e-06, "loss": 0.0105, "step": 6575 }, { "epoch": 2.6905829596412554, "grad_norm": 0.8565563559532166, "learning_rate": 2.0057142857142857e-06, "loss": 0.0144, "step": 6600 }, { "epoch": 2.7007745617611087, "grad_norm": 1.1864107847213745, "learning_rate": 1.97e-06, "loss": 0.013, "step": 6625 }, { "epoch": 2.710966163880962, "grad_norm": 0.6643468141555786, "learning_rate": 1.9342857142857143e-06, "loss": 0.0149, "step": 6650 }, { "epoch": 2.7211577660008155, "grad_norm": 0.6889024376869202, "learning_rate": 1.8985714285714288e-06, "loss": 0.0115, "step": 6675 }, { "epoch": 2.7313493681206684, "grad_norm": 1.5234169960021973, "learning_rate": 1.8628571428571429e-06, "loss": 0.0139, "step": 6700 }, { "epoch": 2.741540970240522, "grad_norm": 1.095461368560791, "learning_rate": 1.8271428571428574e-06, "loss": 0.0132, "step": 6725 }, { "epoch": 2.751732572360375, "grad_norm": 1.7404385805130005, "learning_rate": 1.7914285714285715e-06, "loss": 0.0135, "step": 6750 }, { "epoch": 2.761924174480228, "grad_norm": 0.8274975419044495, "learning_rate": 1.755714285714286e-06, "loss": 0.013, "step": 6775 }, { "epoch": 2.7721157766000815, "grad_norm": 0.8858354091644287, "learning_rate": 1.72e-06, "loss": 0.0118, "step": 6800 }, { "epoch": 2.782307378719935, "grad_norm": 0.5157521963119507, "learning_rate": 1.6842857142857143e-06, "loss": 0.0107, "step": 6825 }, { "epoch": 2.792498980839788, "grad_norm": 1.2508779764175415, "learning_rate": 1.6485714285714289e-06, "loss": 0.015, "step": 6850 }, { "epoch": 2.802690582959641, "grad_norm": 1.2929726839065552, "learning_rate": 1.612857142857143e-06, "loss": 0.0125, "step": 6875 }, { "epoch": 2.8128821850794945, "grad_norm": 0.8893064856529236, "learning_rate": 1.5771428571428574e-06, "loss": 0.0122, "step": 6900 }, { "epoch": 2.823073787199348, "grad_norm": 1.2606889009475708, "learning_rate": 1.5414285714285715e-06, "loss": 0.0137, "step": 6925 }, { "epoch": 2.833265389319201, "grad_norm": 1.410904884338379, "learning_rate": 1.5057142857142858e-06, "loss": 0.0121, "step": 6950 }, { "epoch": 2.843456991439054, "grad_norm": 0.792271077632904, "learning_rate": 1.4700000000000001e-06, "loss": 0.0135, "step": 6975 }, { "epoch": 2.8536485935589075, "grad_norm": 1.0926941633224487, "learning_rate": 1.4342857142857144e-06, "loss": 0.0117, "step": 7000 }, { "epoch": 2.8536485935589075, "eval_loss": 0.022013096138834953, "eval_runtime": 2391.195, "eval_samples_per_second": 4.264, "eval_steps_per_second": 0.133, "eval_wer": 2.5110128090281947, "step": 7000 }, { "epoch": 2.863840195678761, "grad_norm": 1.1174569129943848, "learning_rate": 1.3985714285714285e-06, "loss": 0.0123, "step": 7025 }, { "epoch": 2.874031797798614, "grad_norm": 1.3422982692718506, "learning_rate": 1.362857142857143e-06, "loss": 0.0155, "step": 7050 }, { "epoch": 2.884223399918467, "grad_norm": 1.1771271228790283, "learning_rate": 1.327142857142857e-06, "loss": 0.014, "step": 7075 }, { "epoch": 2.8944150020383206, "grad_norm": 0.8138777017593384, "learning_rate": 1.2914285714285716e-06, "loss": 0.0159, "step": 7100 }, { "epoch": 2.9046066041581735, "grad_norm": 0.964419960975647, "learning_rate": 1.2557142857142859e-06, "loss": 0.0119, "step": 7125 }, { "epoch": 2.914798206278027, "grad_norm": 1.0425853729248047, "learning_rate": 1.2200000000000002e-06, "loss": 0.0144, "step": 7150 }, { "epoch": 2.9249898083978803, "grad_norm": 1.2048903703689575, "learning_rate": 1.1842857142857143e-06, "loss": 0.0112, "step": 7175 }, { "epoch": 2.9351814105177336, "grad_norm": 0.9228710532188416, "learning_rate": 1.1485714285714286e-06, "loss": 0.0121, "step": 7200 }, { "epoch": 2.9453730126375866, "grad_norm": 0.35819801688194275, "learning_rate": 1.1128571428571429e-06, "loss": 0.0103, "step": 7225 }, { "epoch": 2.95556461475744, "grad_norm": 0.8136561512947083, "learning_rate": 1.0771428571428574e-06, "loss": 0.0129, "step": 7250 }, { "epoch": 2.965756216877293, "grad_norm": 1.0411508083343506, "learning_rate": 1.0414285714285717e-06, "loss": 0.0104, "step": 7275 }, { "epoch": 2.975947818997146, "grad_norm": 0.7591568231582642, "learning_rate": 1.0057142857142857e-06, "loss": 0.0104, "step": 7300 }, { "epoch": 2.9861394211169996, "grad_norm": 0.9218117594718933, "learning_rate": 9.7e-07, "loss": 0.0123, "step": 7325 }, { "epoch": 2.996331023236853, "grad_norm": 1.3467174768447876, "learning_rate": 9.342857142857144e-07, "loss": 0.01, "step": 7350 }, { "epoch": 3.006522625356706, "grad_norm": 0.14992065727710724, "learning_rate": 8.985714285714286e-07, "loss": 0.0068, "step": 7375 }, { "epoch": 3.0167142274765593, "grad_norm": 0.44808146357536316, "learning_rate": 8.628571428571429e-07, "loss": 0.0044, "step": 7400 }, { "epoch": 3.0269058295964126, "grad_norm": 0.4159485101699829, "learning_rate": 8.271428571428572e-07, "loss": 0.0045, "step": 7425 }, { "epoch": 3.037097431716266, "grad_norm": 0.5730934143066406, "learning_rate": 7.914285714285715e-07, "loss": 0.0041, "step": 7450 }, { "epoch": 3.047289033836119, "grad_norm": 0.2851293683052063, "learning_rate": 7.557142857142857e-07, "loss": 0.0048, "step": 7475 }, { "epoch": 3.0574806359559723, "grad_norm": 0.3623310625553131, "learning_rate": 7.2e-07, "loss": 0.0054, "step": 7500 }, { "epoch": 3.0676722380758257, "grad_norm": 0.1989881545305252, "learning_rate": 6.842857142857143e-07, "loss": 0.004, "step": 7525 }, { "epoch": 3.0778638401956786, "grad_norm": 0.24523906409740448, "learning_rate": 6.485714285714287e-07, "loss": 0.0052, "step": 7550 }, { "epoch": 3.088055442315532, "grad_norm": 0.09773947298526764, "learning_rate": 6.128571428571429e-07, "loss": 0.0037, "step": 7575 }, { "epoch": 3.0982470444353853, "grad_norm": 0.8551767468452454, "learning_rate": 5.771428571428572e-07, "loss": 0.0041, "step": 7600 }, { "epoch": 3.1084386465552383, "grad_norm": 0.32709449529647827, "learning_rate": 5.414285714285715e-07, "loss": 0.0046, "step": 7625 }, { "epoch": 3.1186302486750916, "grad_norm": 0.22505348920822144, "learning_rate": 5.057142857142858e-07, "loss": 0.0039, "step": 7650 }, { "epoch": 3.128821850794945, "grad_norm": 0.6920948028564453, "learning_rate": 4.7000000000000005e-07, "loss": 0.005, "step": 7675 }, { "epoch": 3.1390134529147984, "grad_norm": 0.37104272842407227, "learning_rate": 4.342857142857143e-07, "loss": 0.0041, "step": 7700 }, { "epoch": 3.1492050550346513, "grad_norm": 0.5752152800559998, "learning_rate": 3.985714285714286e-07, "loss": 0.0046, "step": 7725 }, { "epoch": 3.1593966571545047, "grad_norm": 0.40613701939582825, "learning_rate": 3.6285714285714283e-07, "loss": 0.0041, "step": 7750 }, { "epoch": 3.169588259274358, "grad_norm": 0.4888840317726135, "learning_rate": 3.271428571428572e-07, "loss": 0.0047, "step": 7775 }, { "epoch": 3.179779861394211, "grad_norm": 0.22719787061214447, "learning_rate": 2.914285714285715e-07, "loss": 0.0039, "step": 7800 }, { "epoch": 3.1899714635140644, "grad_norm": 0.3767295479774475, "learning_rate": 2.557142857142857e-07, "loss": 0.0047, "step": 7825 }, { "epoch": 3.2001630656339177, "grad_norm": 0.6686317324638367, "learning_rate": 2.2e-07, "loss": 0.0041, "step": 7850 }, { "epoch": 3.210354667753771, "grad_norm": 0.40455150604248047, "learning_rate": 1.842857142857143e-07, "loss": 0.0058, "step": 7875 }, { "epoch": 3.220546269873624, "grad_norm": 0.2686769962310791, "learning_rate": 1.4857142857142857e-07, "loss": 0.0046, "step": 7900 }, { "epoch": 3.2307378719934774, "grad_norm": 0.17891818284988403, "learning_rate": 1.1285714285714287e-07, "loss": 0.0038, "step": 7925 }, { "epoch": 3.2409294741133308, "grad_norm": 0.32351842522621155, "learning_rate": 7.714285714285715e-08, "loss": 0.0036, "step": 7950 }, { "epoch": 3.2511210762331837, "grad_norm": 0.2572202682495117, "learning_rate": 4.1428571428571426e-08, "loss": 0.0036, "step": 7975 }, { "epoch": 3.261312678353037, "grad_norm": 0.1762164831161499, "learning_rate": 5.714285714285715e-09, "loss": 0.0044, "step": 8000 }, { "epoch": 3.261312678353037, "eval_loss": 0.02037939429283142, "eval_runtime": 2392.6668, "eval_samples_per_second": 4.261, "eval_steps_per_second": 0.133, "eval_wer": 2.2782892974889872, "step": 8000 } ], "logging_steps": 25, "max_steps": 8000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.6124729999261696e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }