{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.964785335262904, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "ETA": 0.0, "epoch": 0.00032159511175430133, "fp16_scale": 1.0, "global_step": 1, "grad_norm": 10.271092308212868, "learning_rate": 2.127659574468085e-08, "loss": 0.675, "step": 1 }, { "ETA": 7.02, "epoch": 0.0006431902235086027, "fp16_scale": 1.0, "global_step": 2, "grad_norm": 5.936799845967266, "learning_rate": 4.25531914893617e-08, "loss": 0.7339, "step": 2 }, { "ETA": 6.9, "epoch": 0.000964785335262904, "fp16_scale": 1.0, "global_step": 3, "grad_norm": 8.761149505364427, "learning_rate": 6.382978723404254e-08, "loss": 0.8264, "step": 3 }, { "ETA": 6.6, "epoch": 0.0012863804470172053, "fp16_scale": 1.0, "global_step": 4, "grad_norm": 8.798292347386411, "learning_rate": 8.51063829787234e-08, "loss": 0.8467, "step": 4 }, { "ETA": 6.56, "epoch": 0.0016079755587715067, "fp16_scale": 1.0, "global_step": 5, "grad_norm": 8.372692731162083, "learning_rate": 1.0638297872340425e-07, "loss": 0.7485, "step": 5 }, { "ETA": 6.47, "epoch": 0.001929570670525808, "fp16_scale": 1.0, "global_step": 6, "grad_norm": 9.34577570201643, "learning_rate": 1.2765957446808508e-07, "loss": 0.8435, "step": 6 }, { "ETA": 6.36, "epoch": 0.0022511657822801095, "fp16_scale": 1.0, "global_step": 7, "grad_norm": 6.855807739840747, "learning_rate": 1.4893617021276595e-07, "loss": 0.7391, "step": 7 }, { "ETA": 6.56, "epoch": 0.0025727608940344106, "fp16_scale": 1.0, "global_step": 8, "grad_norm": 8.247247002061396, "learning_rate": 1.702127659574468e-07, "loss": 0.7966, "step": 8 }, { "ETA": 6.21, "epoch": 0.002894356005788712, "fp16_scale": 1.0, "global_step": 9, "grad_norm": 11.091321882456889, "learning_rate": 1.9148936170212765e-07, "loss": 0.675, "step": 9 }, { "ETA": 6.21, "epoch": 0.0032159511175430134, "fp16_scale": 1.0, "global_step": 10, "grad_norm": 7.801672011661994, "learning_rate": 2.127659574468085e-07, "loss": 0.8447, "step": 10 }, { "ETA": 6.2, "epoch": 0.0035375462292973146, "fp16_scale": 1.0, "global_step": 11, "grad_norm": 7.778967208306511, "learning_rate": 2.3404255319148937e-07, "loss": 0.7343, "step": 11 }, { "ETA": 5.9, "epoch": 0.003859141341051616, "fp16_scale": 1.0, "global_step": 12, "grad_norm": 11.32488432224942, "learning_rate": 2.5531914893617016e-07, "loss": 0.7279, "step": 12 }, { "ETA": 5.92, "epoch": 0.004180736452805918, "fp16_scale": 1.0, "global_step": 13, "grad_norm": 7.951145078377419, "learning_rate": 2.7659574468085106e-07, "loss": 0.7681, "step": 13 }, { "ETA": 5.95, "epoch": 0.004502331564560219, "fp16_scale": 1.0, "global_step": 14, "grad_norm": 8.518318008722654, "learning_rate": 2.978723404255319e-07, "loss": 0.7737, "step": 14 }, { "ETA": 5.98, "epoch": 0.00482392667631452, "fp16_scale": 1.0, "global_step": 15, "grad_norm": 8.733510675457772, "learning_rate": 3.1914893617021275e-07, "loss": 0.763, "step": 15 }, { "ETA": 5.98, "epoch": 0.005145521788068821, "fp16_scale": 1.0, "global_step": 16, "grad_norm": 6.760829110381652, "learning_rate": 3.404255319148936e-07, "loss": 0.7064, "step": 16 }, { "ETA": 5.89, "epoch": 0.005467116899823122, "fp16_scale": 1.0, "global_step": 17, "grad_norm": 7.433915229026562, "learning_rate": 3.617021276595745e-07, "loss": 0.5313, "step": 17 }, { "ETA": 5.98, "epoch": 0.005788712011577424, "fp16_scale": 1.0, "global_step": 18, "grad_norm": 6.2692886913904236, "learning_rate": 3.829787234042553e-07, "loss": 0.7495, "step": 18 }, { "ETA": 5.98, "epoch": 0.006110307123331726, "fp16_scale": 1.0, "global_step": 19, "grad_norm": 7.660828413435692, "learning_rate": 4.0425531914893614e-07, "loss": 0.7335, "step": 19 }, { "ETA": 5.99, "epoch": 0.006431902235086027, "fp16_scale": 1.0, "global_step": 20, "grad_norm": 7.506163600068185, "learning_rate": 4.25531914893617e-07, "loss": 0.7548, "step": 20 }, { "ETA": 6.01, "epoch": 0.006753497346840328, "fp16_scale": 1.0, "global_step": 21, "grad_norm": 8.01415459477147, "learning_rate": 4.4680851063829783e-07, "loss": 0.797, "step": 21 }, { "ETA": 6.0, "epoch": 0.007075092458594629, "fp16_scale": 1.0, "global_step": 22, "grad_norm": 6.070403303562782, "learning_rate": 4.6808510638297873e-07, "loss": 0.7009, "step": 22 }, { "ETA": 5.98, "epoch": 0.00739668757034893, "fp16_scale": 1.0, "global_step": 23, "grad_norm": 7.036884976599998, "learning_rate": 4.893617021276595e-07, "loss": 0.6791, "step": 23 }, { "ETA": 6.0, "epoch": 0.007718282682103232, "fp16_scale": 1.0, "global_step": 24, "grad_norm": 6.889849361911478, "learning_rate": 5.106382978723403e-07, "loss": 0.7487, "step": 24 }, { "ETA": 6.01, "epoch": 0.008039877793857533, "fp16_scale": 1.0, "global_step": 25, "grad_norm": 6.628636420052721, "learning_rate": 5.319148936170212e-07, "loss": 0.6991, "step": 25 }, { "ETA": 6.05, "epoch": 0.008361472905611836, "fp16_scale": 1.0, "global_step": 26, "grad_norm": 5.624553775893587, "learning_rate": 5.531914893617021e-07, "loss": 0.6937, "step": 26 }, { "ETA": 5.92, "epoch": 0.008683068017366137, "fp16_scale": 1.0, "global_step": 27, "grad_norm": 5.196623996489182, "learning_rate": 5.74468085106383e-07, "loss": 0.5243, "step": 27 }, { "ETA": 5.94, "epoch": 0.009004663129120438, "fp16_scale": 1.0, "global_step": 28, "grad_norm": 5.11030998557107, "learning_rate": 5.957446808510638e-07, "loss": 0.6369, "step": 28 }, { "ETA": 5.96, "epoch": 0.009326258240874739, "fp16_scale": 1.0, "global_step": 29, "grad_norm": 3.7503987206102933, "learning_rate": 6.170212765957446e-07, "loss": 0.6579, "step": 29 }, { "ETA": 5.97, "epoch": 0.00964785335262904, "fp16_scale": 1.0, "global_step": 30, "grad_norm": 3.6997926154411065, "learning_rate": 6.382978723404255e-07, "loss": 0.6319, "step": 30 }, { "ETA": 5.96, "epoch": 0.009969448464383341, "fp16_scale": 1.0, "global_step": 31, "grad_norm": 3.325397798658775, "learning_rate": 6.595744680851063e-07, "loss": 0.6509, "step": 31 }, { "ETA": 5.97, "epoch": 0.010291043576137643, "fp16_scale": 1.0, "global_step": 32, "grad_norm": 3.5979872531541917, "learning_rate": 6.808510638297872e-07, "loss": 0.6236, "step": 32 }, { "ETA": 6.0, "epoch": 0.010612638687891944, "fp16_scale": 1.0, "global_step": 33, "grad_norm": 3.3313648341845217, "learning_rate": 7.021276595744681e-07, "loss": 0.5944, "step": 33 }, { "ETA": 6.03, "epoch": 0.010934233799646245, "fp16_scale": 1.0, "global_step": 34, "grad_norm": 2.6531843559585058, "learning_rate": 7.23404255319149e-07, "loss": 0.6317, "step": 34 }, { "ETA": 6.02, "epoch": 0.011255828911400546, "fp16_scale": 1.0, "global_step": 35, "grad_norm": 2.651824591691451, "learning_rate": 7.446808510638297e-07, "loss": 0.5799, "step": 35 }, { "ETA": 6.03, "epoch": 0.011577424023154847, "fp16_scale": 1.0, "global_step": 36, "grad_norm": 3.0054745271450587, "learning_rate": 7.659574468085106e-07, "loss": 0.575, "step": 36 }, { "ETA": 6.02, "epoch": 0.01189901913490915, "fp16_scale": 1.0, "global_step": 37, "grad_norm": 2.7676225818699907, "learning_rate": 7.872340425531915e-07, "loss": 0.5693, "step": 37 }, { "ETA": 5.94, "epoch": 0.012220614246663451, "fp16_scale": 1.0, "global_step": 38, "grad_norm": 4.307955418017736, "learning_rate": 8.085106382978723e-07, "loss": 0.549, "step": 38 }, { "ETA": 5.94, "epoch": 0.012542209358417752, "fp16_scale": 1.0, "global_step": 39, "grad_norm": 2.7592638323739282, "learning_rate": 8.297872340425532e-07, "loss": 0.6385, "step": 39 }, { "ETA": 5.94, "epoch": 0.012863804470172054, "fp16_scale": 1.0, "global_step": 40, "grad_norm": 3.1937611077592183, "learning_rate": 8.51063829787234e-07, "loss": 0.6341, "step": 40 }, { "ETA": 5.94, "epoch": 0.013185399581926355, "fp16_scale": 1.0, "global_step": 41, "grad_norm": 2.3642106915091006, "learning_rate": 8.723404255319149e-07, "loss": 0.5279, "step": 41 }, { "ETA": 5.94, "epoch": 0.013506994693680656, "fp16_scale": 1.0, "global_step": 42, "grad_norm": 2.4889004111147726, "learning_rate": 8.936170212765957e-07, "loss": 0.6534, "step": 42 }, { "ETA": 5.86, "epoch": 0.013828589805434957, "fp16_scale": 1.0, "global_step": 43, "grad_norm": 2.7252878090073382, "learning_rate": 9.148936170212766e-07, "loss": 0.5023, "step": 43 }, { "ETA": 5.89, "epoch": 0.014150184917189258, "fp16_scale": 1.0, "global_step": 44, "grad_norm": 2.2704994128835616, "learning_rate": 9.361702127659575e-07, "loss": 0.596, "step": 44 }, { "ETA": 5.9, "epoch": 0.01447178002894356, "fp16_scale": 1.0, "global_step": 45, "grad_norm": 2.1776982157402784, "learning_rate": 9.574468085106384e-07, "loss": 0.5203, "step": 45 }, { "ETA": 5.91, "epoch": 0.01479337514069786, "fp16_scale": 1.0, "global_step": 46, "grad_norm": 2.3223571282145277, "learning_rate": 9.78723404255319e-07, "loss": 0.6346, "step": 46 }, { "ETA": 5.85, "epoch": 0.015114970252452163, "fp16_scale": 1.0, "global_step": 47, "grad_norm": 2.553678306991624, "learning_rate": 1e-06, "loss": 0.4674, "step": 47 }, { "ETA": 5.85, "epoch": 0.015436565364206465, "fp16_scale": 1.0, "global_step": 48, "grad_norm": 2.197430468099514, "learning_rate": 1.0212765957446806e-06, "loss": 0.5526, "step": 48 }, { "ETA": 5.85, "epoch": 0.015758160475960764, "fp16_scale": 1.0, "global_step": 49, "grad_norm": 2.525799839180918, "learning_rate": 1.0425531914893618e-06, "loss": 0.5723, "step": 49 }, { "ETA": 5.85, "epoch": 0.016079755587715065, "fp16_scale": 1.0, "global_step": 50, "grad_norm": 2.2984636311934863, "learning_rate": 1.0638297872340424e-06, "loss": 0.5411, "step": 50 }, { "ETA": 5.85, "epoch": 0.016401350699469366, "fp16_scale": 1.0, "global_step": 51, "grad_norm": 2.1296579071495185, "learning_rate": 1.0851063829787233e-06, "loss": 0.5586, "step": 51 }, { "ETA": 5.79, "epoch": 0.01672294581122367, "fp16_scale": 1.0, "global_step": 52, "grad_norm": 2.3505923179519965, "learning_rate": 1.1063829787234042e-06, "loss": 0.4382, "step": 52 }, { "ETA": 5.81, "epoch": 0.017044540922977972, "fp16_scale": 1.0, "global_step": 53, "grad_norm": 2.17148946876222, "learning_rate": 1.127659574468085e-06, "loss": 0.5376, "step": 53 }, { "ETA": 5.8, "epoch": 0.017366136034732273, "fp16_scale": 1.0, "global_step": 54, "grad_norm": 1.9342880505071298, "learning_rate": 1.148936170212766e-06, "loss": 0.5249, "step": 54 }, { "ETA": 5.81, "epoch": 0.017687731146486575, "fp16_scale": 1.0, "global_step": 55, "grad_norm": 1.9801427724408849, "learning_rate": 1.1702127659574467e-06, "loss": 0.5198, "step": 55 }, { "ETA": 5.81, "epoch": 0.018009326258240876, "fp16_scale": 1.0, "global_step": 56, "grad_norm": 2.4245238767570356, "learning_rate": 1.1914893617021276e-06, "loss": 0.5221, "step": 56 }, { "ETA": 5.81, "epoch": 0.018330921369995177, "fp16_scale": 1.0, "global_step": 57, "grad_norm": 2.2187104912844138, "learning_rate": 1.2127659574468085e-06, "loss": 0.5299, "step": 57 }, { "ETA": 5.81, "epoch": 0.018652516481749478, "fp16_scale": 1.0, "global_step": 58, "grad_norm": 1.9435727492977588, "learning_rate": 1.2340425531914892e-06, "loss": 0.4573, "step": 58 }, { "ETA": 5.82, "epoch": 0.01897411159350378, "fp16_scale": 1.0, "global_step": 59, "grad_norm": 2.1196247403843183, "learning_rate": 1.2553191489361701e-06, "loss": 0.5041, "step": 59 }, { "ETA": 5.83, "epoch": 0.01929570670525808, "fp16_scale": 1.0, "global_step": 60, "grad_norm": 2.1089989366123785, "learning_rate": 1.276595744680851e-06, "loss": 0.5424, "step": 60 }, { "ETA": 5.85, "epoch": 0.01961730181701238, "fp16_scale": 1.0, "global_step": 61, "grad_norm": 2.2338975030115864, "learning_rate": 1.297872340425532e-06, "loss": 0.4935, "step": 61 }, { "ETA": 5.85, "epoch": 0.019938896928766683, "fp16_scale": 1.0, "global_step": 62, "grad_norm": 2.0431724462748213, "learning_rate": 1.3191489361702126e-06, "loss": 0.5799, "step": 62 }, { "ETA": 5.85, "epoch": 0.020260492040520984, "fp16_scale": 1.0, "global_step": 63, "grad_norm": 2.4551991336746606, "learning_rate": 1.3404255319148935e-06, "loss": 0.6281, "step": 63 }, { "ETA": 5.8, "epoch": 0.020582087152275285, "fp16_scale": 1.0, "global_step": 64, "grad_norm": 3.188788140029998, "learning_rate": 1.3617021276595744e-06, "loss": 0.5042, "step": 64 }, { "ETA": 5.81, "epoch": 0.020903682264029586, "fp16_scale": 1.0, "global_step": 65, "grad_norm": 2.0250469623805225, "learning_rate": 1.3829787234042553e-06, "loss": 0.5447, "step": 65 }, { "ETA": 5.81, "epoch": 0.021225277375783887, "fp16_scale": 1.0, "global_step": 66, "grad_norm": 2.186476354070767, "learning_rate": 1.4042553191489362e-06, "loss": 0.604, "step": 66 }, { "ETA": 5.81, "epoch": 0.02154687248753819, "fp16_scale": 1.0, "global_step": 67, "grad_norm": 1.9016721591480255, "learning_rate": 1.4255319148936169e-06, "loss": 0.4654, "step": 67 }, { "ETA": 5.81, "epoch": 0.02186846759929249, "fp16_scale": 1.0, "global_step": 68, "grad_norm": 1.8932285445408374, "learning_rate": 1.446808510638298e-06, "loss": 0.5115, "step": 68 }, { "ETA": 5.81, "epoch": 0.02219006271104679, "fp16_scale": 1.0, "global_step": 69, "grad_norm": 2.138350825095684, "learning_rate": 1.4680851063829787e-06, "loss": 0.599, "step": 69 }, { "ETA": 5.82, "epoch": 0.022511657822801092, "fp16_scale": 1.0, "global_step": 70, "grad_norm": 2.011538617509216, "learning_rate": 1.4893617021276594e-06, "loss": 0.516, "step": 70 }, { "ETA": 5.82, "epoch": 0.022833252934555393, "fp16_scale": 1.0, "global_step": 71, "grad_norm": 2.364474781341946, "learning_rate": 1.5106382978723405e-06, "loss": 0.5453, "step": 71 }, { "ETA": 5.82, "epoch": 0.023154848046309694, "fp16_scale": 1.0, "global_step": 72, "grad_norm": 1.842910522233911, "learning_rate": 1.5319148936170212e-06, "loss": 0.476, "step": 72 }, { "ETA": 5.82, "epoch": 0.023476443158064, "fp16_scale": 1.0, "global_step": 73, "grad_norm": 1.9116819343832798, "learning_rate": 1.5531914893617019e-06, "loss": 0.5051, "step": 73 }, { "ETA": 5.77, "epoch": 0.0237980382698183, "fp16_scale": 1.0, "global_step": 74, "grad_norm": 3.27843789986936, "learning_rate": 1.574468085106383e-06, "loss": 0.4861, "step": 74 }, { "ETA": 5.77, "epoch": 0.0241196333815726, "fp16_scale": 1.0, "global_step": 75, "grad_norm": 2.38109700900136, "learning_rate": 1.5957446808510637e-06, "loss": 0.5261, "step": 75 }, { "ETA": 5.77, "epoch": 0.024441228493326903, "fp16_scale": 1.0, "global_step": 76, "grad_norm": 2.23743872021617, "learning_rate": 1.6170212765957446e-06, "loss": 0.5736, "step": 76 }, { "ETA": 5.72, "epoch": 0.024762823605081204, "fp16_scale": 1.0, "global_step": 77, "grad_norm": 2.8559459749027383, "learning_rate": 1.6382978723404255e-06, "loss": 0.4531, "step": 77 }, { "ETA": 5.73, "epoch": 0.025084418716835505, "fp16_scale": 1.0, "global_step": 78, "grad_norm": 1.9454163160560185, "learning_rate": 1.6595744680851064e-06, "loss": 0.5391, "step": 78 }, { "ETA": 5.69, "epoch": 0.025406013828589806, "fp16_scale": 1.0, "global_step": 79, "grad_norm": 2.7935117012479136, "learning_rate": 1.6808510638297873e-06, "loss": 0.468, "step": 79 }, { "ETA": 5.71, "epoch": 0.025727608940344107, "fp16_scale": 1.0, "global_step": 80, "grad_norm": 2.0607040982783893, "learning_rate": 1.702127659574468e-06, "loss": 0.4884, "step": 80 }, { "ETA": 5.71, "epoch": 0.02604920405209841, "fp16_scale": 1.0, "global_step": 81, "grad_norm": 1.9616817221481178, "learning_rate": 1.7234042553191488e-06, "loss": 0.5125, "step": 81 }, { "ETA": 5.68, "epoch": 0.02637079916385271, "fp16_scale": 1.0, "global_step": 82, "grad_norm": 2.8684132922538823, "learning_rate": 1.7446808510638297e-06, "loss": 0.4395, "step": 82 }, { "ETA": 5.64, "epoch": 0.02669239427560701, "fp16_scale": 1.0, "global_step": 83, "grad_norm": 2.462224589767338, "learning_rate": 1.7659574468085106e-06, "loss": 0.4355, "step": 83 }, { "ETA": 5.64, "epoch": 0.027013989387361312, "fp16_scale": 1.0, "global_step": 84, "grad_norm": 2.2938452504024056, "learning_rate": 1.7872340425531913e-06, "loss": 0.5227, "step": 84 }, { "ETA": 5.64, "epoch": 0.027335584499115613, "fp16_scale": 1.0, "global_step": 85, "grad_norm": 2.1576688140194147, "learning_rate": 1.8085106382978722e-06, "loss": 0.5308, "step": 85 }, { "ETA": 5.64, "epoch": 0.027657179610869914, "fp16_scale": 1.0, "global_step": 86, "grad_norm": 2.120174390807831, "learning_rate": 1.8297872340425531e-06, "loss": 0.5216, "step": 86 }, { "ETA": 5.64, "epoch": 0.027978774722624215, "fp16_scale": 1.0, "global_step": 87, "grad_norm": 2.0757019848487057, "learning_rate": 1.8510638297872338e-06, "loss": 0.51, "step": 87 }, { "ETA": 5.64, "epoch": 0.028300369834378516, "fp16_scale": 1.0, "global_step": 88, "grad_norm": 2.1525748432569047, "learning_rate": 1.872340425531915e-06, "loss": 0.5147, "step": 88 }, { "ETA": 5.65, "epoch": 0.028621964946132818, "fp16_scale": 1.0, "global_step": 89, "grad_norm": 2.191002649456998, "learning_rate": 1.8936170212765956e-06, "loss": 0.5744, "step": 89 }, { "ETA": 5.67, "epoch": 0.02894356005788712, "fp16_scale": 1.0, "global_step": 90, "grad_norm": 2.0169223118103283, "learning_rate": 1.9148936170212767e-06, "loss": 0.5322, "step": 90 }, { "ETA": 5.67, "epoch": 0.02926515516964142, "fp16_scale": 1.0, "global_step": 91, "grad_norm": 2.043929358251754, "learning_rate": 1.936170212765957e-06, "loss": 0.5384, "step": 91 }, { "ETA": 5.68, "epoch": 0.02958675028139572, "fp16_scale": 1.0, "global_step": 92, "grad_norm": 1.9056931212133323, "learning_rate": 1.957446808510638e-06, "loss": 0.5185, "step": 92 }, { "ETA": 5.68, "epoch": 0.029908345393150026, "fp16_scale": 1.0, "global_step": 93, "grad_norm": 2.307999687316851, "learning_rate": 1.978723404255319e-06, "loss": 0.5083, "step": 93 }, { "ETA": 5.68, "epoch": 0.030229940504904327, "fp16_scale": 1.0, "global_step": 94, "grad_norm": 2.3069773784363874, "learning_rate": 2e-06, "loss": 0.4874, "step": 94 }, { "ETA": 5.68, "epoch": 0.030551535616658628, "fp16_scale": 1.0, "global_step": 95, "grad_norm": 2.1375152636420762, "learning_rate": 1.999999457130956e-06, "loss": 0.5308, "step": 95 }, { "ETA": 5.68, "epoch": 0.03087313072841293, "fp16_scale": 1.0, "global_step": 96, "grad_norm": 1.779984554116194, "learning_rate": 1.999997828524414e-06, "loss": 0.4612, "step": 96 }, { "ETA": 5.69, "epoch": 0.03119472584016723, "fp16_scale": 1.0, "global_step": 97, "grad_norm": 1.8897525194740394, "learning_rate": 1.999995114182142e-06, "loss": 0.4783, "step": 97 }, { "ETA": 5.7, "epoch": 0.03151632095192153, "fp16_scale": 1.0, "global_step": 98, "grad_norm": 2.140424509099989, "learning_rate": 1.999991314107087e-06, "loss": 0.6014, "step": 98 }, { "ETA": 5.69, "epoch": 0.03183791606367583, "fp16_scale": 1.0, "global_step": 99, "grad_norm": 2.0231806427993053, "learning_rate": 1.9999864283033744e-06, "loss": 0.5012, "step": 99 }, { "ETA": 5.7, "epoch": 0.03215951117543013, "fp16_scale": 1.0, "global_step": 100, "grad_norm": 2.0863695551562, "learning_rate": 1.99998045677631e-06, "loss": 0.5462, "step": 100 }, { "ETA": 5.66, "epoch": 0.03248110628718443, "fp16_scale": 1.0, "global_step": 101, "grad_norm": 2.437841320875372, "learning_rate": 1.999973399532377e-06, "loss": 0.446, "step": 101 }, { "ETA": 5.67, "epoch": 0.03280270139893873, "fp16_scale": 1.0, "global_step": 102, "grad_norm": 2.112285993018359, "learning_rate": 1.999965256579237e-06, "loss": 0.4646, "step": 102 }, { "ETA": 5.67, "epoch": 0.03312429651069304, "fp16_scale": 1.0, "global_step": 103, "grad_norm": 2.012409806465424, "learning_rate": 1.9999560279257314e-06, "loss": 0.425, "step": 103 }, { "ETA": 5.67, "epoch": 0.03344589162244734, "fp16_scale": 1.0, "global_step": 104, "grad_norm": 1.92659649084298, "learning_rate": 1.9999457135818805e-06, "loss": 0.5055, "step": 104 }, { "ETA": 5.67, "epoch": 0.03376748673420164, "fp16_scale": 1.0, "global_step": 105, "grad_norm": 1.9258326937148058, "learning_rate": 1.9999343135588825e-06, "loss": 0.5538, "step": 105 }, { "ETA": 5.68, "epoch": 0.034089081845955944, "fp16_scale": 1.0, "global_step": 106, "grad_norm": 2.092933785114832, "learning_rate": 1.9999218278691153e-06, "loss": 0.4819, "step": 106 }, { "ETA": 5.69, "epoch": 0.034410676957710246, "fp16_scale": 1.0, "global_step": 107, "grad_norm": 1.90548945056872, "learning_rate": 1.999908256526135e-06, "loss": 0.4903, "step": 107 }, { "ETA": 5.69, "epoch": 0.03473227206946455, "fp16_scale": 1.0, "global_step": 108, "grad_norm": 2.149863310234961, "learning_rate": 1.9998935995446764e-06, "loss": 0.5083, "step": 108 }, { "ETA": 5.69, "epoch": 0.03505386718121885, "fp16_scale": 1.0, "global_step": 109, "grad_norm": 2.019334705889536, "learning_rate": 1.999877856940653e-06, "loss": 0.4884, "step": 109 }, { "ETA": 5.69, "epoch": 0.03537546229297315, "fp16_scale": 1.0, "global_step": 110, "grad_norm": 2.0805274339165, "learning_rate": 1.9998610287311573e-06, "loss": 0.4998, "step": 110 }, { "ETA": 5.68, "epoch": 0.03569705740472745, "fp16_scale": 1.0, "global_step": 111, "grad_norm": 1.854134725693307, "learning_rate": 1.9998431149344605e-06, "loss": 0.4633, "step": 111 }, { "ETA": 5.65, "epoch": 0.03601865251648175, "fp16_scale": 1.0, "global_step": 112, "grad_norm": 2.435635768617461, "learning_rate": 1.999824115570012e-06, "loss": 0.4366, "step": 112 }, { "ETA": 5.63, "epoch": 0.03634024762823605, "fp16_scale": 1.0, "global_step": 113, "grad_norm": 2.71798042302074, "learning_rate": 1.9998040306584397e-06, "loss": 0.4651, "step": 113 }, { "ETA": 5.6, "epoch": 0.036661842739990354, "fp16_scale": 1.0, "global_step": 114, "grad_norm": 2.557939383567869, "learning_rate": 1.999782860221552e-06, "loss": 0.425, "step": 114 }, { "ETA": 5.6, "epoch": 0.036983437851744655, "fp16_scale": 1.0, "global_step": 115, "grad_norm": 1.9245468047531342, "learning_rate": 1.999760604282333e-06, "loss": 0.4295, "step": 115 }, { "ETA": 5.61, "epoch": 0.037305032963498956, "fp16_scale": 1.0, "global_step": 116, "grad_norm": 1.8876528914974746, "learning_rate": 1.9997372628649476e-06, "loss": 0.4415, "step": 116 }, { "ETA": 5.61, "epoch": 0.03762662807525326, "fp16_scale": 1.0, "global_step": 117, "grad_norm": 1.9802708149084507, "learning_rate": 1.999712835994738e-06, "loss": 0.5122, "step": 117 }, { "ETA": 5.61, "epoch": 0.03794822318700756, "fp16_scale": 1.0, "global_step": 118, "grad_norm": 2.1211433879182144, "learning_rate": 1.9996873236982257e-06, "loss": 0.5276, "step": 118 }, { "ETA": 5.61, "epoch": 0.03826981829876186, "fp16_scale": 1.0, "global_step": 119, "grad_norm": 1.621281570789123, "learning_rate": 1.9996607260031105e-06, "loss": 0.4932, "step": 119 }, { "ETA": 5.61, "epoch": 0.03859141341051616, "fp16_scale": 1.0, "global_step": 120, "grad_norm": 1.9653619168923202, "learning_rate": 1.9996330429382703e-06, "loss": 0.5654, "step": 120 }, { "ETA": 5.62, "epoch": 0.03891300852227046, "fp16_scale": 1.0, "global_step": 121, "grad_norm": 2.2123648273822942, "learning_rate": 1.9996042745337615e-06, "loss": 0.4775, "step": 121 }, { "ETA": 5.62, "epoch": 0.03923460363402476, "fp16_scale": 1.0, "global_step": 122, "grad_norm": 1.9972465034521092, "learning_rate": 1.9995744208208192e-06, "loss": 0.4768, "step": 122 }, { "ETA": 5.62, "epoch": 0.039556198745779064, "fp16_scale": 1.0, "global_step": 123, "grad_norm": 2.0828761416592663, "learning_rate": 1.999543481831857e-06, "loss": 0.5236, "step": 123 }, { "ETA": 5.63, "epoch": 0.039877793857533365, "fp16_scale": 1.0, "global_step": 124, "grad_norm": 2.262870812400955, "learning_rate": 1.999511457600466e-06, "loss": 0.5392, "step": 124 }, { "ETA": 5.63, "epoch": 0.04019938896928767, "fp16_scale": 1.0, "global_step": 125, "grad_norm": 2.006385958193691, "learning_rate": 1.9994783481614164e-06, "loss": 0.4454, "step": 125 }, { "ETA": 5.62, "epoch": 0.04052098408104197, "fp16_scale": 1.0, "global_step": 126, "grad_norm": 1.9169684519984513, "learning_rate": 1.9994441535506564e-06, "loss": 0.409, "step": 126 }, { "ETA": 5.63, "epoch": 0.04084257919279627, "fp16_scale": 1.0, "global_step": 127, "grad_norm": 1.7978841639226444, "learning_rate": 1.9994088738053125e-06, "loss": 0.5075, "step": 127 }, { "ETA": 5.63, "epoch": 0.04116417430455057, "fp16_scale": 1.0, "global_step": 128, "grad_norm": 2.281911267986894, "learning_rate": 1.999372508963689e-06, "loss": 0.5013, "step": 128 }, { "ETA": 5.62, "epoch": 0.04148576941630487, "fp16_scale": 1.0, "global_step": 129, "grad_norm": 2.0421301534446243, "learning_rate": 1.999335059065269e-06, "loss": 0.4435, "step": 129 }, { "ETA": 5.62, "epoch": 0.04180736452805917, "fp16_scale": 1.0, "global_step": 130, "grad_norm": 1.930763418660808, "learning_rate": 1.9992965241507127e-06, "loss": 0.4362, "step": 130 }, { "ETA": 5.6, "epoch": 0.042128959639813474, "fp16_scale": 1.0, "global_step": 131, "grad_norm": 2.1611057009273718, "learning_rate": 1.9992569042618594e-06, "loss": 0.4057, "step": 131 }, { "ETA": 5.6, "epoch": 0.042450554751567775, "fp16_scale": 1.0, "global_step": 132, "grad_norm": 2.068532510448745, "learning_rate": 1.999216199441726e-06, "loss": 0.5291, "step": 132 }, { "ETA": 5.61, "epoch": 0.042772149863322076, "fp16_scale": 1.0, "global_step": 133, "grad_norm": 2.1349463536595232, "learning_rate": 1.999174409734507e-06, "loss": 0.4926, "step": 133 }, { "ETA": 5.61, "epoch": 0.04309374497507638, "fp16_scale": 1.0, "global_step": 134, "grad_norm": 1.7812791752819839, "learning_rate": 1.9991315351855745e-06, "loss": 0.3955, "step": 134 }, { "ETA": 5.62, "epoch": 0.04341534008683068, "fp16_scale": 1.0, "global_step": 135, "grad_norm": 2.2711082613133877, "learning_rate": 1.99908757584148e-06, "loss": 0.5022, "step": 135 }, { "ETA": 5.61, "epoch": 0.04373693519858498, "fp16_scale": 1.0, "global_step": 136, "grad_norm": 2.2507108902753004, "learning_rate": 1.9990425317499516e-06, "loss": 0.4648, "step": 136 }, { "ETA": 5.61, "epoch": 0.04405853031033928, "fp16_scale": 1.0, "global_step": 137, "grad_norm": 2.1452354144039996, "learning_rate": 1.998996402959895e-06, "loss": 0.4525, "step": 137 }, { "ETA": 5.61, "epoch": 0.04438012542209358, "fp16_scale": 1.0, "global_step": 138, "grad_norm": 2.0144953999608965, "learning_rate": 1.9989491895213946e-06, "loss": 0.4986, "step": 138 }, { "ETA": 5.59, "epoch": 0.04470172053384788, "fp16_scale": 1.0, "global_step": 139, "grad_norm": 2.229946650769043, "learning_rate": 1.998900891485711e-06, "loss": 0.3849, "step": 139 }, { "ETA": 5.59, "epoch": 0.045023315645602184, "fp16_scale": 1.0, "global_step": 140, "grad_norm": 2.0253377758809625, "learning_rate": 1.998851508905284e-06, "loss": 0.581, "step": 140 }, { "ETA": 5.6, "epoch": 0.045344910757356485, "fp16_scale": 1.0, "global_step": 141, "grad_norm": 2.206479589908534, "learning_rate": 1.9988010418337304e-06, "loss": 0.4867, "step": 141 }, { "ETA": 5.61, "epoch": 0.045666505869110786, "fp16_scale": 1.0, "global_step": 142, "grad_norm": 1.7500501463013969, "learning_rate": 1.998749490325843e-06, "loss": 0.4973, "step": 142 }, { "ETA": 5.61, "epoch": 0.04598810098086509, "fp16_scale": 1.0, "global_step": 143, "grad_norm": 1.6690448702127838, "learning_rate": 1.998696854437594e-06, "loss": 0.4401, "step": 143 }, { "ETA": 5.61, "epoch": 0.04630969609261939, "fp16_scale": 1.0, "global_step": 144, "grad_norm": 1.9301595047071682, "learning_rate": 1.998643134226132e-06, "loss": 0.4081, "step": 144 }, { "ETA": 5.61, "epoch": 0.0466312912043737, "fp16_scale": 1.0, "global_step": 145, "grad_norm": 2.031377215245968, "learning_rate": 1.998588329749783e-06, "loss": 0.5083, "step": 145 }, { "ETA": 5.61, "epoch": 0.046952886316128, "fp16_scale": 1.0, "global_step": 146, "grad_norm": 1.9480273584696632, "learning_rate": 1.998532441068051e-06, "loss": 0.4895, "step": 146 }, { "ETA": 5.61, "epoch": 0.0472744814278823, "fp16_scale": 1.0, "global_step": 147, "grad_norm": 1.940140543978122, "learning_rate": 1.9984754682416157e-06, "loss": 0.5188, "step": 147 }, { "ETA": 5.59, "epoch": 0.0475960765396366, "fp16_scale": 1.0, "global_step": 148, "grad_norm": 2.566694912162044, "learning_rate": 1.998417411332335e-06, "loss": 0.4565, "step": 148 }, { "ETA": 5.6, "epoch": 0.0479176716513909, "fp16_scale": 1.0, "global_step": 149, "grad_norm": 2.1015541461979628, "learning_rate": 1.9983582704032434e-06, "loss": 0.466, "step": 149 }, { "ETA": 5.61, "epoch": 0.0482392667631452, "fp16_scale": 1.0, "global_step": 150, "grad_norm": 2.1246119471767777, "learning_rate": 1.9982980455185523e-06, "loss": 0.5224, "step": 150 }, { "ETA": 5.59, "epoch": 0.048560861874899504, "fp16_scale": 1.0, "global_step": 151, "grad_norm": 2.302387604082975, "learning_rate": 1.9982367367436505e-06, "loss": 0.4105, "step": 151 }, { "ETA": 5.57, "epoch": 0.048882456986653805, "fp16_scale": 1.0, "global_step": 152, "grad_norm": 2.2524238140564226, "learning_rate": 1.998174344145103e-06, "loss": 0.4797, "step": 152 }, { "ETA": 5.57, "epoch": 0.049204052098408106, "fp16_scale": 1.0, "global_step": 153, "grad_norm": 1.8770814388726234, "learning_rate": 1.9981108677906516e-06, "loss": 0.435, "step": 153 }, { "ETA": 5.57, "epoch": 0.04952564721016241, "fp16_scale": 1.0, "global_step": 154, "grad_norm": 2.2883451205924352, "learning_rate": 1.9980463077492156e-06, "loss": 0.5423, "step": 154 }, { "ETA": 5.55, "epoch": 0.04984724232191671, "fp16_scale": 1.0, "global_step": 155, "grad_norm": 2.0598058556836634, "learning_rate": 1.9979806640908904e-06, "loss": 0.4055, "step": 155 }, { "ETA": 5.55, "epoch": 0.05016883743367101, "fp16_scale": 1.0, "global_step": 156, "grad_norm": 2.151177357556838, "learning_rate": 1.997913936886947e-06, "loss": 0.541, "step": 156 }, { "ETA": 5.55, "epoch": 0.05049043254542531, "fp16_scale": 1.0, "global_step": 157, "grad_norm": 1.9583108423046163, "learning_rate": 1.997846126209834e-06, "loss": 0.5167, "step": 157 }, { "ETA": 5.55, "epoch": 0.05081202765717961, "fp16_scale": 1.0, "global_step": 158, "grad_norm": 1.7191485045098294, "learning_rate": 1.997777232133176e-06, "loss": 0.48, "step": 158 }, { "ETA": 5.55, "epoch": 0.05113362276893391, "fp16_scale": 1.0, "global_step": 159, "grad_norm": 1.871417794238402, "learning_rate": 1.997707254731775e-06, "loss": 0.4349, "step": 159 }, { "ETA": 5.55, "epoch": 0.051455217880688214, "fp16_scale": 1.0, "global_step": 160, "grad_norm": 1.8506517380221816, "learning_rate": 1.997636194081606e-06, "loss": 0.4633, "step": 160 }, { "ETA": 5.56, "epoch": 0.051776812992442516, "fp16_scale": 1.0, "global_step": 161, "grad_norm": 2.248773620263743, "learning_rate": 1.997564050259824e-06, "loss": 0.4719, "step": 161 }, { "ETA": 5.54, "epoch": 0.05209840810419682, "fp16_scale": 1.0, "global_step": 162, "grad_norm": 2.25152787158001, "learning_rate": 1.997490823344758e-06, "loss": 0.4203, "step": 162 }, { "ETA": 5.54, "epoch": 0.05242000321595112, "fp16_scale": 1.0, "global_step": 163, "grad_norm": 1.8999498537580275, "learning_rate": 1.9974165134159125e-06, "loss": 0.5587, "step": 163 }, { "ETA": 5.54, "epoch": 0.05274159832770542, "fp16_scale": 1.0, "global_step": 164, "grad_norm": 1.9716592311180214, "learning_rate": 1.9973411205539693e-06, "loss": 0.5083, "step": 164 }, { "ETA": 5.54, "epoch": 0.05306319343945972, "fp16_scale": 1.0, "global_step": 165, "grad_norm": 2.1338753921220284, "learning_rate": 1.9972646448407852e-06, "loss": 0.469, "step": 165 }, { "ETA": 5.54, "epoch": 0.05338478855121402, "fp16_scale": 1.0, "global_step": 166, "grad_norm": 1.8632227385106377, "learning_rate": 1.9971870863593923e-06, "loss": 0.4324, "step": 166 }, { "ETA": 5.54, "epoch": 0.05370638366296832, "fp16_scale": 1.0, "global_step": 167, "grad_norm": 1.9945406494512294, "learning_rate": 1.9971084451939993e-06, "loss": 0.4701, "step": 167 }, { "ETA": 5.56, "epoch": 0.054027978774722624, "fp16_scale": 1.0, "global_step": 168, "grad_norm": 2.014063734615643, "learning_rate": 1.99702872142999e-06, "loss": 0.5058, "step": 168 }, { "ETA": 5.56, "epoch": 0.054349573886476925, "fp16_scale": 1.0, "global_step": 169, "grad_norm": 1.9732053379353365, "learning_rate": 1.9969479151539234e-06, "loss": 0.4995, "step": 169 }, { "ETA": 5.56, "epoch": 0.054671168998231226, "fp16_scale": 1.0, "global_step": 170, "grad_norm": 1.7441435113487023, "learning_rate": 1.9968660264535337e-06, "loss": 0.4408, "step": 170 }, { "ETA": 5.56, "epoch": 0.05499276410998553, "fp16_scale": 1.0, "global_step": 171, "grad_norm": 2.252759572663441, "learning_rate": 1.996783055417731e-06, "loss": 0.4896, "step": 171 }, { "ETA": 5.56, "epoch": 0.05531435922173983, "fp16_scale": 1.0, "global_step": 172, "grad_norm": 1.8886045826013742, "learning_rate": 1.9966990021365996e-06, "loss": 0.4924, "step": 172 }, { "ETA": 5.56, "epoch": 0.05563595433349413, "fp16_scale": 1.0, "global_step": 173, "grad_norm": 1.9236696733010126, "learning_rate": 1.9966138667014e-06, "loss": 0.4812, "step": 173 }, { "ETA": 5.56, "epoch": 0.05595754944524843, "fp16_scale": 1.0, "global_step": 174, "grad_norm": 1.8929409263112333, "learning_rate": 1.9965276492045658e-06, "loss": 0.4767, "step": 174 }, { "ETA": 5.56, "epoch": 0.05627914455700273, "fp16_scale": 1.0, "global_step": 175, "grad_norm": 2.138949107889882, "learning_rate": 1.996440349739708e-06, "loss": 0.5348, "step": 175 }, { "ETA": 5.56, "epoch": 0.05660073966875703, "fp16_scale": 1.0, "global_step": 176, "grad_norm": 2.0879534448263315, "learning_rate": 1.9963519684016104e-06, "loss": 0.5359, "step": 176 }, { "ETA": 5.54, "epoch": 0.056922334780511334, "fp16_scale": 1.0, "global_step": 177, "grad_norm": 2.1666246712030013, "learning_rate": 1.996262505286232e-06, "loss": 0.4162, "step": 177 }, { "ETA": 5.55, "epoch": 0.057243929892265635, "fp16_scale": 1.0, "global_step": 178, "grad_norm": 1.7446995140521049, "learning_rate": 1.9961719604907065e-06, "loss": 0.4333, "step": 178 }, { "ETA": 5.54, "epoch": 0.057565525004019936, "fp16_scale": 1.0, "global_step": 179, "grad_norm": 2.243637912780736, "learning_rate": 1.996080334113341e-06, "loss": 0.4457, "step": 179 }, { "ETA": 5.53, "epoch": 0.05788712011577424, "fp16_scale": 1.0, "global_step": 180, "grad_norm": 1.9630983410591967, "learning_rate": 1.995987626253619e-06, "loss": 0.4734, "step": 180 }, { "ETA": 5.53, "epoch": 0.05820871522752854, "fp16_scale": 1.0, "global_step": 181, "grad_norm": 2.0683050944198205, "learning_rate": 1.995893837012196e-06, "loss": 0.4875, "step": 181 }, { "ETA": 5.53, "epoch": 0.05853031033928284, "fp16_scale": 1.0, "global_step": 182, "grad_norm": 1.9810359539411369, "learning_rate": 1.9957989664909025e-06, "loss": 0.5567, "step": 182 }, { "ETA": 5.53, "epoch": 0.05885190545103714, "fp16_scale": 1.0, "global_step": 183, "grad_norm": 1.9715690339103922, "learning_rate": 1.995703014792744e-06, "loss": 0.4645, "step": 183 }, { "ETA": 5.52, "epoch": 0.05917350056279144, "fp16_scale": 1.0, "global_step": 184, "grad_norm": 2.27653607742063, "learning_rate": 1.995605982021898e-06, "loss": 0.4036, "step": 184 }, { "ETA": 5.52, "epoch": 0.059495095674545743, "fp16_scale": 1.0, "global_step": 185, "grad_norm": 2.1737370192078567, "learning_rate": 1.995507868283717e-06, "loss": 0.5222, "step": 185 }, { "ETA": 5.52, "epoch": 0.05981669078630005, "fp16_scale": 1.0, "global_step": 186, "grad_norm": 2.218311186174217, "learning_rate": 1.995408673684727e-06, "loss": 0.4512, "step": 186 }, { "ETA": 5.52, "epoch": 0.06013828589805435, "fp16_scale": 1.0, "global_step": 187, "grad_norm": 2.0221690338291576, "learning_rate": 1.995308398332627e-06, "loss": 0.5215, "step": 187 }, { "ETA": 5.52, "epoch": 0.060459881009808654, "fp16_scale": 1.0, "global_step": 188, "grad_norm": 2.09772793745461, "learning_rate": 1.99520704233629e-06, "loss": 0.5167, "step": 188 }, { "ETA": 5.5, "epoch": 0.060781476121562955, "fp16_scale": 1.0, "global_step": 189, "grad_norm": 2.149692926673016, "learning_rate": 1.995104605805762e-06, "loss": 0.4484, "step": 189 }, { "ETA": 5.5, "epoch": 0.061103071233317256, "fp16_scale": 1.0, "global_step": 190, "grad_norm": 1.871556062942563, "learning_rate": 1.995001088852262e-06, "loss": 0.475, "step": 190 }, { "ETA": 5.5, "epoch": 0.06142466634507156, "fp16_scale": 1.0, "global_step": 191, "grad_norm": 2.063540687892712, "learning_rate": 1.9948964915881833e-06, "loss": 0.4103, "step": 191 }, { "ETA": 5.5, "epoch": 0.06174626145682586, "fp16_scale": 1.0, "global_step": 192, "grad_norm": 2.0723345018771515, "learning_rate": 1.9947908141270895e-06, "loss": 0.4584, "step": 192 }, { "ETA": 5.5, "epoch": 0.06206785656858016, "fp16_scale": 1.0, "global_step": 193, "grad_norm": 2.1660972095275164, "learning_rate": 1.9946840565837204e-06, "loss": 0.5952, "step": 193 }, { "ETA": 5.51, "epoch": 0.06238945168033446, "fp16_scale": 1.0, "global_step": 194, "grad_norm": 2.0273096299920965, "learning_rate": 1.994576219073985e-06, "loss": 0.5321, "step": 194 }, { "ETA": 5.51, "epoch": 0.06271104679208876, "fp16_scale": 1.0, "global_step": 195, "grad_norm": 2.141105415839073, "learning_rate": 1.994467301714968e-06, "loss": 0.4956, "step": 195 }, { "ETA": 5.51, "epoch": 0.06303264190384306, "fp16_scale": 1.0, "global_step": 196, "grad_norm": 1.8902357894072024, "learning_rate": 1.9943573046249244e-06, "loss": 0.5354, "step": 196 }, { "ETA": 5.51, "epoch": 0.06335423701559736, "fp16_scale": 1.0, "global_step": 197, "grad_norm": 1.8151811916682024, "learning_rate": 1.9942462279232824e-06, "loss": 0.4465, "step": 197 }, { "ETA": 5.51, "epoch": 0.06367583212735166, "fp16_scale": 1.0, "global_step": 198, "grad_norm": 1.9991294470323713, "learning_rate": 1.9941340717306423e-06, "loss": 0.4476, "step": 198 }, { "ETA": 5.51, "epoch": 0.06399742723910597, "fp16_scale": 1.0, "global_step": 199, "grad_norm": 1.8900639064764466, "learning_rate": 1.9940208361687756e-06, "loss": 0.4736, "step": 199 }, { "ETA": 5.51, "epoch": 0.06431902235086026, "fp16_scale": 1.0, "global_step": 200, "grad_norm": 2.0981671204932897, "learning_rate": 1.993906521360628e-06, "loss": 0.5347, "step": 200 }, { "ETA": 5.64, "epoch": 0.06464061746261457, "fp16_scale": 1.0, "global_step": 201, "grad_norm": 2.1689594357999513, "learning_rate": 1.9937911274303143e-06, "loss": 0.5682, "step": 201 }, { "ETA": 5.64, "epoch": 0.06496221257436886, "fp16_scale": 1.0, "global_step": 202, "grad_norm": 2.05451397459771, "learning_rate": 1.993674654503122e-06, "loss": 0.4233, "step": 202 }, { "ETA": 5.64, "epoch": 0.06528380768612317, "fp16_scale": 1.0, "global_step": 203, "grad_norm": 2.1267490771823003, "learning_rate": 1.993557102705511e-06, "loss": 0.4746, "step": 203 }, { "ETA": 5.64, "epoch": 0.06560540279787747, "fp16_scale": 1.0, "global_step": 204, "grad_norm": 2.0247502131741846, "learning_rate": 1.9934384721651113e-06, "loss": 0.4593, "step": 204 }, { "ETA": 5.62, "epoch": 0.06592699790963177, "fp16_scale": 1.0, "global_step": 205, "grad_norm": 2.3792680496159235, "learning_rate": 1.9933187630107243e-06, "loss": 0.4401, "step": 205 }, { "ETA": 5.62, "epoch": 0.06624859302138608, "fp16_scale": 1.0, "global_step": 206, "grad_norm": 1.8766085210911245, "learning_rate": 1.9931979753723233e-06, "loss": 0.5038, "step": 206 }, { "ETA": 5.63, "epoch": 0.06657018813314038, "fp16_scale": 1.0, "global_step": 207, "grad_norm": 2.13191856771071, "learning_rate": 1.993076109381052e-06, "loss": 0.4498, "step": 207 }, { "ETA": 5.63, "epoch": 0.06689178324489468, "fp16_scale": 1.0, "global_step": 208, "grad_norm": 2.7407131077048708, "learning_rate": 1.9929531651692245e-06, "loss": 0.4447, "step": 208 }, { "ETA": 5.63, "epoch": 0.06721337835664898, "fp16_scale": 1.0, "global_step": 209, "grad_norm": 2.1340982723217263, "learning_rate": 1.992829142870326e-06, "loss": 0.4677, "step": 209 }, { "ETA": 5.63, "epoch": 0.06753497346840329, "fp16_scale": 1.0, "global_step": 210, "grad_norm": 2.0796370416466363, "learning_rate": 1.992704042619013e-06, "loss": 0.4343, "step": 210 }, { "ETA": 5.62, "epoch": 0.06785656858015758, "fp16_scale": 1.0, "global_step": 211, "grad_norm": 2.0605035706073087, "learning_rate": 1.992577864551111e-06, "loss": 0.4034, "step": 211 }, { "ETA": 5.63, "epoch": 0.06817816369191189, "fp16_scale": 1.0, "global_step": 212, "grad_norm": 1.980858167776178, "learning_rate": 1.9924506088036165e-06, "loss": 0.4863, "step": 212 }, { "ETA": 5.62, "epoch": 0.06849975880366618, "fp16_scale": 1.0, "global_step": 213, "grad_norm": 2.114940893621464, "learning_rate": 1.9923222755146957e-06, "loss": 0.4586, "step": 213 }, { "ETA": 5.61, "epoch": 0.06882135391542049, "fp16_scale": 1.0, "global_step": 214, "grad_norm": 2.1489712242568233, "learning_rate": 1.992192864823685e-06, "loss": 0.3696, "step": 214 }, { "ETA": 5.61, "epoch": 0.06914294902717479, "fp16_scale": 1.0, "global_step": 215, "grad_norm": 2.28734431293892, "learning_rate": 1.992062376871091e-06, "loss": 0.5426, "step": 215 }, { "ETA": 5.61, "epoch": 0.0694645441389291, "fp16_scale": 1.0, "global_step": 216, "grad_norm": 1.961631918540186, "learning_rate": 1.991930811798589e-06, "loss": 0.3718, "step": 216 }, { "ETA": 5.61, "epoch": 0.06978613925068339, "fp16_scale": 1.0, "global_step": 217, "grad_norm": 2.201954772079071, "learning_rate": 1.991798169749024e-06, "loss": 0.4645, "step": 217 }, { "ETA": 5.61, "epoch": 0.0701077343624377, "fp16_scale": 1.0, "global_step": 218, "grad_norm": 1.683968639976631, "learning_rate": 1.9916644508664113e-06, "loss": 0.4626, "step": 218 }, { "ETA": 5.61, "epoch": 0.07042932947419199, "fp16_scale": 1.0, "global_step": 219, "grad_norm": 1.8954887135461083, "learning_rate": 1.991529655295934e-06, "loss": 0.4655, "step": 219 }, { "ETA": 5.61, "epoch": 0.0707509245859463, "fp16_scale": 1.0, "global_step": 220, "grad_norm": 1.9778515969605384, "learning_rate": 1.9913937831839447e-06, "loss": 0.4583, "step": 220 }, { "ETA": 5.59, "epoch": 0.07107251969770059, "fp16_scale": 1.0, "global_step": 221, "grad_norm": 2.263287480391095, "learning_rate": 1.991256834677965e-06, "loss": 0.4433, "step": 221 }, { "ETA": 5.59, "epoch": 0.0713941148094549, "fp16_scale": 1.0, "global_step": 222, "grad_norm": 2.3745252441991695, "learning_rate": 1.991118809926685e-06, "loss": 0.4594, "step": 222 }, { "ETA": 5.59, "epoch": 0.0717157099212092, "fp16_scale": 1.0, "global_step": 223, "grad_norm": 1.794064858274459, "learning_rate": 1.990979709079964e-06, "loss": 0.4435, "step": 223 }, { "ETA": 5.59, "epoch": 0.0720373050329635, "fp16_scale": 1.0, "global_step": 224, "grad_norm": 2.1273526787298747, "learning_rate": 1.9908395322888292e-06, "loss": 0.4445, "step": 224 }, { "ETA": 5.59, "epoch": 0.0723589001447178, "fp16_scale": 1.0, "global_step": 225, "grad_norm": 1.9294042532521454, "learning_rate": 1.9906982797054747e-06, "loss": 0.4026, "step": 225 }, { "ETA": 5.59, "epoch": 0.0726804952564721, "fp16_scale": 1.0, "global_step": 226, "grad_norm": 2.0009389469804906, "learning_rate": 1.990555951483265e-06, "loss": 0.4871, "step": 226 }, { "ETA": 5.59, "epoch": 0.0730020903682264, "fp16_scale": 1.0, "global_step": 227, "grad_norm": 1.9451852255069821, "learning_rate": 1.99041254777673e-06, "loss": 0.4363, "step": 227 }, { "ETA": 5.59, "epoch": 0.07332368547998071, "fp16_scale": 1.0, "global_step": 228, "grad_norm": 1.9439742030991134, "learning_rate": 1.99026806874157e-06, "loss": 0.3954, "step": 228 }, { "ETA": 5.57, "epoch": 0.073645280591735, "fp16_scale": 1.0, "global_step": 229, "grad_norm": 2.0564657113810303, "learning_rate": 1.990122514534651e-06, "loss": 0.4065, "step": 229 }, { "ETA": 5.56, "epoch": 0.07396687570348931, "fp16_scale": 1.0, "global_step": 230, "grad_norm": 2.0852125947795663, "learning_rate": 1.9899758853140062e-06, "loss": 0.3481, "step": 230 }, { "ETA": 5.54, "epoch": 0.0742884708152436, "fp16_scale": 1.0, "global_step": 231, "grad_norm": 2.0552294493253815, "learning_rate": 1.9898281812388367e-06, "loss": 0.3555, "step": 231 }, { "ETA": 5.54, "epoch": 0.07461006592699791, "fp16_scale": 1.0, "global_step": 232, "grad_norm": 1.9173492037411566, "learning_rate": 1.9896794024695106e-06, "loss": 0.441, "step": 232 }, { "ETA": 5.53, "epoch": 0.0749316610387522, "fp16_scale": 1.0, "global_step": 233, "grad_norm": 2.206279686080648, "learning_rate": 1.9895295491675627e-06, "loss": 0.4212, "step": 233 }, { "ETA": 5.53, "epoch": 0.07525325615050651, "fp16_scale": 1.0, "global_step": 234, "grad_norm": 2.1281257459738536, "learning_rate": 1.989378621495694e-06, "loss": 0.4442, "step": 234 }, { "ETA": 5.53, "epoch": 0.07557485126226081, "fp16_scale": 1.0, "global_step": 235, "grad_norm": 2.1215344537423224, "learning_rate": 1.9892266196177734e-06, "loss": 0.4542, "step": 235 }, { "ETA": 5.52, "epoch": 0.07589644637401512, "fp16_scale": 1.0, "global_step": 236, "grad_norm": 2.090130869035227, "learning_rate": 1.9890735436988344e-06, "loss": 0.5499, "step": 236 }, { "ETA": 5.52, "epoch": 0.07621804148576941, "fp16_scale": 1.0, "global_step": 237, "grad_norm": 1.937700495165257, "learning_rate": 1.9889193939050776e-06, "loss": 0.438, "step": 237 }, { "ETA": 5.52, "epoch": 0.07653963659752372, "fp16_scale": 1.0, "global_step": 238, "grad_norm": 2.0169598442542207, "learning_rate": 1.988764170403869e-06, "loss": 0.3989, "step": 238 }, { "ETA": 5.52, "epoch": 0.07686123170927801, "fp16_scale": 1.0, "global_step": 239, "grad_norm": 1.9172068608330903, "learning_rate": 1.9886078733637405e-06, "loss": 0.4921, "step": 239 }, { "ETA": 5.52, "epoch": 0.07718282682103232, "fp16_scale": 1.0, "global_step": 240, "grad_norm": 2.0508531717431784, "learning_rate": 1.9884505029543905e-06, "loss": 0.4666, "step": 240 }, { "ETA": 5.52, "epoch": 0.07750442193278662, "fp16_scale": 1.0, "global_step": 241, "grad_norm": 1.8813618835383783, "learning_rate": 1.9882920593466815e-06, "loss": 0.4799, "step": 241 }, { "ETA": 5.51, "epoch": 0.07782601704454092, "fp16_scale": 1.0, "global_step": 242, "grad_norm": 2.0156583473726357, "learning_rate": 1.988132542712642e-06, "loss": 0.4824, "step": 242 }, { "ETA": 5.52, "epoch": 0.07814761215629522, "fp16_scale": 1.0, "global_step": 243, "grad_norm": 1.8786998169485898, "learning_rate": 1.9879719532254654e-06, "loss": 0.4526, "step": 243 }, { "ETA": 5.52, "epoch": 0.07846920726804953, "fp16_scale": 1.0, "global_step": 244, "grad_norm": 1.9040461898330572, "learning_rate": 1.9878102910595096e-06, "loss": 0.4329, "step": 244 }, { "ETA": 5.52, "epoch": 0.07879080237980382, "fp16_scale": 1.0, "global_step": 245, "grad_norm": 2.178599517193598, "learning_rate": 1.9876475563902967e-06, "loss": 0.448, "step": 245 }, { "ETA": 5.52, "epoch": 0.07911239749155813, "fp16_scale": 1.0, "global_step": 246, "grad_norm": 2.242242117764703, "learning_rate": 1.987483749394515e-06, "loss": 0.4983, "step": 246 }, { "ETA": 5.5, "epoch": 0.07943399260331242, "fp16_scale": 1.0, "global_step": 247, "grad_norm": 2.5317565590513804, "learning_rate": 1.9873188702500162e-06, "loss": 0.4893, "step": 247 }, { "ETA": 5.49, "epoch": 0.07975558771506673, "fp16_scale": 1.0, "global_step": 248, "grad_norm": 2.307404938147276, "learning_rate": 1.9871529191358147e-06, "loss": 0.4137, "step": 248 }, { "ETA": 5.49, "epoch": 0.08007718282682104, "fp16_scale": 1.0, "global_step": 249, "grad_norm": 2.0851611752705255, "learning_rate": 1.9869858962320907e-06, "loss": 0.5131, "step": 249 }, { "ETA": 5.49, "epoch": 0.08039877793857533, "fp16_scale": 1.0, "global_step": 250, "grad_norm": 1.855787380898543, "learning_rate": 1.986817801720187e-06, "loss": 0.5206, "step": 250 }, { "ETA": 5.48, "epoch": 0.08072037305032964, "fp16_scale": 1.0, "global_step": 251, "grad_norm": 1.9574119303708928, "learning_rate": 1.9866486357826107e-06, "loss": 0.4617, "step": 251 }, { "ETA": 5.49, "epoch": 0.08104196816208394, "fp16_scale": 1.0, "global_step": 252, "grad_norm": 2.0464551158026674, "learning_rate": 1.9864783986030313e-06, "loss": 0.4136, "step": 252 }, { "ETA": 5.49, "epoch": 0.08136356327383824, "fp16_scale": 1.0, "global_step": 253, "grad_norm": 1.9828752781504224, "learning_rate": 1.9863070903662816e-06, "loss": 0.4896, "step": 253 }, { "ETA": 5.48, "epoch": 0.08168515838559254, "fp16_scale": 1.0, "global_step": 254, "grad_norm": 2.111681055324118, "learning_rate": 1.986134711258358e-06, "loss": 0.4898, "step": 254 }, { "ETA": 5.47, "epoch": 0.08200675349734685, "fp16_scale": 1.0, "global_step": 255, "grad_norm": 2.152892422133239, "learning_rate": 1.9859612614664184e-06, "loss": 0.3783, "step": 255 }, { "ETA": 5.47, "epoch": 0.08232834860910114, "fp16_scale": 1.0, "global_step": 256, "grad_norm": 2.1160619106041128, "learning_rate": 1.9857867411787847e-06, "loss": 0.4835, "step": 256 }, { "ETA": 5.47, "epoch": 0.08264994372085545, "fp16_scale": 1.0, "global_step": 257, "grad_norm": 2.0588220447500998, "learning_rate": 1.9856111505849395e-06, "loss": 0.5168, "step": 257 }, { "ETA": 5.47, "epoch": 0.08297153883260974, "fp16_scale": 1.0, "global_step": 258, "grad_norm": 2.155900845405635, "learning_rate": 1.9854344898755286e-06, "loss": 0.474, "step": 258 }, { "ETA": 5.46, "epoch": 0.08329313394436405, "fp16_scale": 1.0, "global_step": 259, "grad_norm": 2.4831085607503414, "learning_rate": 1.985256759242359e-06, "loss": 0.44, "step": 259 }, { "ETA": 5.46, "epoch": 0.08361472905611834, "fp16_scale": 1.0, "global_step": 260, "grad_norm": 2.1759924833508464, "learning_rate": 1.9850779588783996e-06, "loss": 0.47, "step": 260 }, { "ETA": 5.45, "epoch": 0.08393632416787265, "fp16_scale": 1.0, "global_step": 261, "grad_norm": 2.2814754125500345, "learning_rate": 1.9848980889777815e-06, "loss": 0.4207, "step": 261 }, { "ETA": 5.45, "epoch": 0.08425791927962695, "fp16_scale": 1.0, "global_step": 262, "grad_norm": 2.0695546230875115, "learning_rate": 1.984717149735795e-06, "loss": 0.5273, "step": 262 }, { "ETA": 5.45, "epoch": 0.08457951439138126, "fp16_scale": 1.0, "global_step": 263, "grad_norm": 2.011815287590636, "learning_rate": 1.984535141348894e-06, "loss": 0.4799, "step": 263 }, { "ETA": 5.45, "epoch": 0.08490110950313555, "fp16_scale": 1.0, "global_step": 264, "grad_norm": 1.7799971082339463, "learning_rate": 1.9843520640146907e-06, "loss": 0.5345, "step": 264 }, { "ETA": 5.45, "epoch": 0.08522270461488986, "fp16_scale": 1.0, "global_step": 265, "grad_norm": 2.022283024501158, "learning_rate": 1.9841679179319603e-06, "loss": 0.4232, "step": 265 }, { "ETA": 5.44, "epoch": 0.08554429972664415, "fp16_scale": 1.0, "global_step": 266, "grad_norm": 2.1994235877050237, "learning_rate": 1.983982703300637e-06, "loss": 0.4251, "step": 266 }, { "ETA": 5.44, "epoch": 0.08586589483839846, "fp16_scale": 1.0, "global_step": 267, "grad_norm": 1.7288150832221585, "learning_rate": 1.9837964203218146e-06, "loss": 0.4375, "step": 267 }, { "ETA": 5.44, "epoch": 0.08618748995015275, "fp16_scale": 1.0, "global_step": 268, "grad_norm": 1.8733532543135558, "learning_rate": 1.9836090691977484e-06, "loss": 0.452, "step": 268 }, { "ETA": 5.44, "epoch": 0.08650908506190706, "fp16_scale": 1.0, "global_step": 269, "grad_norm": 1.9609845801107526, "learning_rate": 1.983420650131852e-06, "loss": 0.4456, "step": 269 }, { "ETA": 5.44, "epoch": 0.08683068017366136, "fp16_scale": 1.0, "global_step": 270, "grad_norm": 2.0104836911219515, "learning_rate": 1.9832311633287e-06, "loss": 0.4146, "step": 270 }, { "ETA": 5.44, "epoch": 0.08715227528541566, "fp16_scale": 1.0, "global_step": 271, "grad_norm": 1.9748172750720034, "learning_rate": 1.9830406089940248e-06, "loss": 0.516, "step": 271 }, { "ETA": 5.44, "epoch": 0.08747387039716996, "fp16_scale": 1.0, "global_step": 272, "grad_norm": 1.7308986178352186, "learning_rate": 1.982848987334719e-06, "loss": 0.4786, "step": 272 }, { "ETA": 5.43, "epoch": 0.08779546550892427, "fp16_scale": 1.0, "global_step": 273, "grad_norm": 2.407921562331774, "learning_rate": 1.9826562985588327e-06, "loss": 0.5075, "step": 273 }, { "ETA": 5.42, "epoch": 0.08811706062067856, "fp16_scale": 1.0, "global_step": 274, "grad_norm": 2.4177834248326686, "learning_rate": 1.9824625428755758e-06, "loss": 0.3536, "step": 274 }, { "ETA": 5.42, "epoch": 0.08843865573243287, "fp16_scale": 1.0, "global_step": 275, "grad_norm": 1.9921521614436994, "learning_rate": 1.9822677204953168e-06, "loss": 0.4529, "step": 275 }, { "ETA": 5.41, "epoch": 0.08876025084418716, "fp16_scale": 1.0, "global_step": 276, "grad_norm": 2.1416846030041032, "learning_rate": 1.9820718316295814e-06, "loss": 0.4128, "step": 276 }, { "ETA": 5.41, "epoch": 0.08908184595594147, "fp16_scale": 1.0, "global_step": 277, "grad_norm": 1.96303970778231, "learning_rate": 1.9818748764910537e-06, "loss": 0.4963, "step": 277 }, { "ETA": 5.41, "epoch": 0.08940344106769577, "fp16_scale": 1.0, "global_step": 278, "grad_norm": 2.16885895447799, "learning_rate": 1.981676855293575e-06, "loss": 0.4541, "step": 278 }, { "ETA": 5.41, "epoch": 0.08972503617945007, "fp16_scale": 1.0, "global_step": 279, "grad_norm": 2.0061306880759386, "learning_rate": 1.9814777682521446e-06, "loss": 0.5096, "step": 279 }, { "ETA": 5.41, "epoch": 0.09004663129120437, "fp16_scale": 1.0, "global_step": 280, "grad_norm": 2.0713104021770787, "learning_rate": 1.981277615582919e-06, "loss": 0.479, "step": 280 }, { "ETA": 5.41, "epoch": 0.09036822640295868, "fp16_scale": 1.0, "global_step": 281, "grad_norm": 1.9008364956507453, "learning_rate": 1.9810763975032115e-06, "loss": 0.4401, "step": 281 }, { "ETA": 5.41, "epoch": 0.09068982151471297, "fp16_scale": 1.0, "global_step": 282, "grad_norm": 1.9828121117507633, "learning_rate": 1.9808741142314927e-06, "loss": 0.4563, "step": 282 }, { "ETA": 5.41, "epoch": 0.09101141662646728, "fp16_scale": 1.0, "global_step": 283, "grad_norm": 1.8067168743021271, "learning_rate": 1.9806707659873885e-06, "loss": 0.5321, "step": 283 }, { "ETA": 5.41, "epoch": 0.09133301173822157, "fp16_scale": 1.0, "global_step": 284, "grad_norm": 1.8139567240837908, "learning_rate": 1.9804663529916823e-06, "loss": 0.396, "step": 284 }, { "ETA": 5.41, "epoch": 0.09165460684997588, "fp16_scale": 1.0, "global_step": 285, "grad_norm": 1.9160801554334261, "learning_rate": 1.980260875466313e-06, "loss": 0.4233, "step": 285 }, { "ETA": 5.41, "epoch": 0.09197620196173018, "fp16_scale": 1.0, "global_step": 286, "grad_norm": 2.152353481726994, "learning_rate": 1.9800543336343757e-06, "loss": 0.551, "step": 286 }, { "ETA": 5.4, "epoch": 0.09229779707348448, "fp16_scale": 1.0, "global_step": 287, "grad_norm": 2.3467354491283334, "learning_rate": 1.9798467277201197e-06, "loss": 0.3933, "step": 287 }, { "ETA": 5.4, "epoch": 0.09261939218523878, "fp16_scale": 1.0, "global_step": 288, "grad_norm": 1.9965345728302655, "learning_rate": 1.9796380579489517e-06, "loss": 0.5057, "step": 288 }, { "ETA": 5.4, "epoch": 0.09294098729699309, "fp16_scale": 1.0, "global_step": 289, "grad_norm": 2.226085466112576, "learning_rate": 1.9794283245474318e-06, "loss": 0.4997, "step": 289 }, { "ETA": 5.4, "epoch": 0.0932625824087474, "fp16_scale": 1.0, "global_step": 290, "grad_norm": 2.3422961278712022, "learning_rate": 1.9792175277432763e-06, "loss": 0.4265, "step": 290 }, { "ETA": 5.4, "epoch": 0.09358417752050169, "fp16_scale": 1.0, "global_step": 291, "grad_norm": 2.250545144667838, "learning_rate": 1.9790056677653543e-06, "loss": 0.4607, "step": 291 }, { "ETA": 5.4, "epoch": 0.093905772632256, "fp16_scale": 1.0, "global_step": 292, "grad_norm": 2.107671337765196, "learning_rate": 1.978792744843691e-06, "loss": 0.4609, "step": 292 }, { "ETA": 5.4, "epoch": 0.09422736774401029, "fp16_scale": 1.0, "global_step": 293, "grad_norm": 2.3046588962914516, "learning_rate": 1.9785787592094646e-06, "loss": 0.49, "step": 293 }, { "ETA": 5.4, "epoch": 0.0945489628557646, "fp16_scale": 1.0, "global_step": 294, "grad_norm": 2.121159234142659, "learning_rate": 1.978363711095007e-06, "loss": 0.4361, "step": 294 }, { "ETA": 5.4, "epoch": 0.09487055796751889, "fp16_scale": 1.0, "global_step": 295, "grad_norm": 2.002846941930535, "learning_rate": 1.9781476007338054e-06, "loss": 0.432, "step": 295 }, { "ETA": 5.4, "epoch": 0.0951921530792732, "fp16_scale": 1.0, "global_step": 296, "grad_norm": 1.8538864150738767, "learning_rate": 1.9779304283604985e-06, "loss": 0.4812, "step": 296 }, { "ETA": 5.39, "epoch": 0.0955137481910275, "fp16_scale": 1.0, "global_step": 297, "grad_norm": 2.571711408648999, "learning_rate": 1.977712194210878e-06, "loss": 0.4292, "step": 297 }, { "ETA": 5.39, "epoch": 0.0958353433027818, "fp16_scale": 1.0, "global_step": 298, "grad_norm": 1.9498087105851019, "learning_rate": 1.977492898521889e-06, "loss": 0.5071, "step": 298 }, { "ETA": 5.39, "epoch": 0.0961569384145361, "fp16_scale": 1.0, "global_step": 299, "grad_norm": 2.0510326617660404, "learning_rate": 1.9772725415316304e-06, "loss": 0.4764, "step": 299 }, { "ETA": 5.39, "epoch": 0.0964785335262904, "fp16_scale": 1.0, "global_step": 300, "grad_norm": 1.9448634485999639, "learning_rate": 1.977051123479351e-06, "loss": 0.4956, "step": 300 }, { "ETA": 5.39, "epoch": 0.0968001286380447, "fp16_scale": 1.0, "global_step": 301, "grad_norm": 2.178769414009859, "learning_rate": 1.9768286446054532e-06, "loss": 0.4224, "step": 301 }, { "ETA": 5.39, "epoch": 0.09712172374979901, "fp16_scale": 1.0, "global_step": 302, "grad_norm": 2.2638075232189108, "learning_rate": 1.976605105151491e-06, "loss": 0.4662, "step": 302 }, { "ETA": 5.38, "epoch": 0.0974433188615533, "fp16_scale": 1.0, "global_step": 303, "grad_norm": 2.434637731614345, "learning_rate": 1.9763805053601696e-06, "loss": 0.4414, "step": 303 }, { "ETA": 5.38, "epoch": 0.09776491397330761, "fp16_scale": 1.0, "global_step": 304, "grad_norm": 2.1063739090403812, "learning_rate": 1.976154845475345e-06, "loss": 0.4902, "step": 304 }, { "ETA": 5.38, "epoch": 0.0980865090850619, "fp16_scale": 1.0, "global_step": 305, "grad_norm": 2.2403744821922458, "learning_rate": 1.9759281257420257e-06, "loss": 0.5037, "step": 305 }, { "ETA": 5.38, "epoch": 0.09840810419681621, "fp16_scale": 1.0, "global_step": 306, "grad_norm": 1.9278762220091021, "learning_rate": 1.9757003464063693e-06, "loss": 0.4761, "step": 306 }, { "ETA": 5.38, "epoch": 0.0987296993085705, "fp16_scale": 1.0, "global_step": 307, "grad_norm": 1.9065898198947517, "learning_rate": 1.975471507715685e-06, "loss": 0.4683, "step": 307 }, { "ETA": 5.38, "epoch": 0.09905129442032481, "fp16_scale": 1.0, "global_step": 308, "grad_norm": 1.9777547584058885, "learning_rate": 1.9752416099184304e-06, "loss": 0.4321, "step": 308 }, { "ETA": 5.38, "epoch": 0.09937288953207911, "fp16_scale": 1.0, "global_step": 309, "grad_norm": 1.963299362703367, "learning_rate": 1.9750106532642156e-06, "loss": 0.5347, "step": 309 }, { "ETA": 5.37, "epoch": 0.09969448464383342, "fp16_scale": 1.0, "global_step": 310, "grad_norm": 2.2616629673340327, "learning_rate": 1.974778638003799e-06, "loss": 0.4078, "step": 310 }, { "ETA": 5.37, "epoch": 0.10001607975558771, "fp16_scale": 1.0, "global_step": 311, "grad_norm": 1.8888751677487754, "learning_rate": 1.974545564389088e-06, "loss": 0.4648, "step": 311 }, { "ETA": 5.37, "epoch": 0.10033767486734202, "fp16_scale": 1.0, "global_step": 312, "grad_norm": 2.090204859896294, "learning_rate": 1.974311432673139e-06, "loss": 0.4834, "step": 312 }, { "ETA": 5.36, "epoch": 0.10065926997909631, "fp16_scale": 1.0, "global_step": 313, "grad_norm": 1.9850263954290188, "learning_rate": 1.974076243110159e-06, "loss": 0.4134, "step": 313 }, { "ETA": 5.36, "epoch": 0.10098086509085062, "fp16_scale": 1.0, "global_step": 314, "grad_norm": 1.8760687192321301, "learning_rate": 1.973839995955501e-06, "loss": 0.4857, "step": 314 }, { "ETA": 5.36, "epoch": 0.10130246020260492, "fp16_scale": 1.0, "global_step": 315, "grad_norm": 1.8290568280756117, "learning_rate": 1.9736026914656684e-06, "loss": 0.4375, "step": 315 }, { "ETA": 5.36, "epoch": 0.10162405531435922, "fp16_scale": 1.0, "global_step": 316, "grad_norm": 2.2258972292420514, "learning_rate": 1.973364329898311e-06, "loss": 0.5097, "step": 316 }, { "ETA": 5.36, "epoch": 0.10194565042611352, "fp16_scale": 1.0, "global_step": 317, "grad_norm": 1.9283114461835762, "learning_rate": 1.973124911512228e-06, "loss": 0.481, "step": 317 }, { "ETA": 5.35, "epoch": 0.10226724553786783, "fp16_scale": 1.0, "global_step": 318, "grad_norm": 1.8324474077343869, "learning_rate": 1.9728844365673643e-06, "loss": 0.39, "step": 318 }, { "ETA": 5.35, "epoch": 0.10258884064962212, "fp16_scale": 1.0, "global_step": 319, "grad_norm": 2.2401290418034145, "learning_rate": 1.9726429053248126e-06, "loss": 0.446, "step": 319 }, { "ETA": 5.35, "epoch": 0.10291043576137643, "fp16_scale": 1.0, "global_step": 320, "grad_norm": 1.9427407352785937, "learning_rate": 1.9724003180468134e-06, "loss": 0.5237, "step": 320 }, { "ETA": 5.35, "epoch": 0.10323203087313072, "fp16_scale": 1.0, "global_step": 321, "grad_norm": 2.1206260936732795, "learning_rate": 1.972156674996752e-06, "loss": 0.4907, "step": 321 }, { "ETA": 5.35, "epoch": 0.10355362598488503, "fp16_scale": 1.0, "global_step": 322, "grad_norm": 2.1426857430727035, "learning_rate": 1.971911976439162e-06, "loss": 0.4884, "step": 322 }, { "ETA": 5.35, "epoch": 0.10387522109663933, "fp16_scale": 1.0, "global_step": 323, "grad_norm": 1.9358955934852775, "learning_rate": 1.9716662226397206e-06, "loss": 0.4172, "step": 323 }, { "ETA": 5.34, "epoch": 0.10419681620839363, "fp16_scale": 1.0, "global_step": 324, "grad_norm": 1.9755616719498104, "learning_rate": 1.971419413865253e-06, "loss": 0.4955, "step": 324 }, { "ETA": 5.34, "epoch": 0.10451841132014793, "fp16_scale": 1.0, "global_step": 325, "grad_norm": 2.1111800831528167, "learning_rate": 1.9711715503837286e-06, "loss": 0.4747, "step": 325 }, { "ETA": 5.34, "epoch": 0.10484000643190224, "fp16_scale": 1.0, "global_step": 326, "grad_norm": 2.0681166737032473, "learning_rate": 1.9709226324642626e-06, "loss": 0.4525, "step": 326 }, { "ETA": 5.34, "epoch": 0.10516160154365653, "fp16_scale": 1.0, "global_step": 327, "grad_norm": 2.0329374279997983, "learning_rate": 1.970672660377114e-06, "loss": 0.4459, "step": 327 }, { "ETA": 5.33, "epoch": 0.10548319665541084, "fp16_scale": 1.0, "global_step": 328, "grad_norm": 2.074488988732474, "learning_rate": 1.970421634393687e-06, "loss": 0.3724, "step": 328 }, { "ETA": 5.33, "epoch": 0.10580479176716513, "fp16_scale": 1.0, "global_step": 329, "grad_norm": 2.138757297944091, "learning_rate": 1.970169554786531e-06, "loss": 0.4979, "step": 329 }, { "ETA": 5.33, "epoch": 0.10612638687891944, "fp16_scale": 1.0, "global_step": 330, "grad_norm": 1.8627660554651546, "learning_rate": 1.9699164218293377e-06, "loss": 0.4033, "step": 330 }, { "ETA": 5.33, "epoch": 0.10644798199067375, "fp16_scale": 1.0, "global_step": 331, "grad_norm": 2.0531385160925346, "learning_rate": 1.9696622357969435e-06, "loss": 0.4733, "step": 331 }, { "ETA": 5.33, "epoch": 0.10676957710242804, "fp16_scale": 1.0, "global_step": 332, "grad_norm": 1.995724099048772, "learning_rate": 1.9694069969653276e-06, "loss": 0.4718, "step": 332 }, { "ETA": 5.33, "epoch": 0.10709117221418235, "fp16_scale": 1.0, "global_step": 333, "grad_norm": 1.8849998772482415, "learning_rate": 1.9691507056116124e-06, "loss": 0.4744, "step": 333 }, { "ETA": 5.33, "epoch": 0.10741276732593665, "fp16_scale": 1.0, "global_step": 334, "grad_norm": 2.002411293165941, "learning_rate": 1.9688933620140635e-06, "loss": 0.5237, "step": 334 }, { "ETA": 5.33, "epoch": 0.10773436243769095, "fp16_scale": 1.0, "global_step": 335, "grad_norm": 2.398691984124437, "learning_rate": 1.9686349664520887e-06, "loss": 0.4134, "step": 335 }, { "ETA": 5.32, "epoch": 0.10805595754944525, "fp16_scale": 1.0, "global_step": 336, "grad_norm": 1.9498967989275895, "learning_rate": 1.968375519206238e-06, "loss": 0.3973, "step": 336 }, { "ETA": 5.32, "epoch": 0.10837755266119956, "fp16_scale": 1.0, "global_step": 337, "grad_norm": 1.9017393329115277, "learning_rate": 1.9681150205582025e-06, "loss": 0.4283, "step": 337 }, { "ETA": 5.31, "epoch": 0.10869914777295385, "fp16_scale": 1.0, "global_step": 338, "grad_norm": 2.0508551842273097, "learning_rate": 1.967853470790816e-06, "loss": 0.3679, "step": 338 }, { "ETA": 5.31, "epoch": 0.10902074288470816, "fp16_scale": 1.0, "global_step": 339, "grad_norm": 2.1077150608156603, "learning_rate": 1.967590870188053e-06, "loss": 0.4512, "step": 339 }, { "ETA": 5.31, "epoch": 0.10934233799646245, "fp16_scale": 1.0, "global_step": 340, "grad_norm": 1.9681869106158016, "learning_rate": 1.967327219035029e-06, "loss": 0.4334, "step": 340 }, { "ETA": 5.31, "epoch": 0.10966393310821676, "fp16_scale": 1.0, "global_step": 341, "grad_norm": 2.109716319501794, "learning_rate": 1.967062517618e-06, "loss": 0.3853, "step": 341 }, { "ETA": 5.31, "epoch": 0.10998552821997105, "fp16_scale": 1.0, "global_step": 342, "grad_norm": 2.016872674766734, "learning_rate": 1.9667967662243624e-06, "loss": 0.4655, "step": 342 }, { "ETA": 5.3, "epoch": 0.11030712333172536, "fp16_scale": 1.0, "global_step": 343, "grad_norm": 2.108575161654793, "learning_rate": 1.966529965142653e-06, "loss": 0.4595, "step": 343 }, { "ETA": 5.3, "epoch": 0.11062871844347966, "fp16_scale": 1.0, "global_step": 344, "grad_norm": 2.173420307534748, "learning_rate": 1.966262114662547e-06, "loss": 0.4948, "step": 344 }, { "ETA": 5.3, "epoch": 0.11095031355523396, "fp16_scale": 1.0, "global_step": 345, "grad_norm": 2.0730606440639785, "learning_rate": 1.9659932150748607e-06, "loss": 0.4506, "step": 345 }, { "ETA": 5.3, "epoch": 0.11127190866698826, "fp16_scale": 1.0, "global_step": 346, "grad_norm": 1.9958990928537186, "learning_rate": 1.9657232666715485e-06, "loss": 0.5104, "step": 346 }, { "ETA": 5.3, "epoch": 0.11159350377874257, "fp16_scale": 1.0, "global_step": 347, "grad_norm": 1.7452582290531609, "learning_rate": 1.9654522697457033e-06, "loss": 0.4345, "step": 347 }, { "ETA": 5.3, "epoch": 0.11191509889049686, "fp16_scale": 1.0, "global_step": 348, "grad_norm": 1.8489716903183635, "learning_rate": 1.9651802245915573e-06, "loss": 0.4735, "step": 348 }, { "ETA": 5.29, "epoch": 0.11223669400225117, "fp16_scale": 1.0, "global_step": 349, "grad_norm": 2.3048133813373703, "learning_rate": 1.9649071315044794e-06, "loss": 0.4097, "step": 349 }, { "ETA": 5.28, "epoch": 0.11255828911400546, "fp16_scale": 1.0, "global_step": 350, "grad_norm": 2.2961743129791756, "learning_rate": 1.9646329907809786e-06, "loss": 0.4117, "step": 350 }, { "ETA": 5.28, "epoch": 0.11287988422575977, "fp16_scale": 1.0, "global_step": 351, "grad_norm": 1.9315862702027242, "learning_rate": 1.9643578027186984e-06, "loss": 0.4826, "step": 351 }, { "ETA": 5.28, "epoch": 0.11320147933751407, "fp16_scale": 1.0, "global_step": 352, "grad_norm": 1.9851217902296738, "learning_rate": 1.9640815676164216e-06, "loss": 0.5012, "step": 352 }, { "ETA": 5.28, "epoch": 0.11352307444926837, "fp16_scale": 1.0, "global_step": 353, "grad_norm": 2.3317631555471037, "learning_rate": 1.9638042857740673e-06, "loss": 0.4248, "step": 353 }, { "ETA": 5.28, "epoch": 0.11384466956102267, "fp16_scale": 1.0, "global_step": 354, "grad_norm": 1.5533112095795296, "learning_rate": 1.963525957492691e-06, "loss": 0.4486, "step": 354 }, { "ETA": 5.27, "epoch": 0.11416626467277698, "fp16_scale": 1.0, "global_step": 355, "grad_norm": 1.9359397572861832, "learning_rate": 1.9632465830744845e-06, "loss": 0.4699, "step": 355 }, { "ETA": 5.27, "epoch": 0.11448785978453127, "fp16_scale": 1.0, "global_step": 356, "grad_norm": 2.1492595181809464, "learning_rate": 1.9629661628227743e-06, "loss": 0.4772, "step": 356 }, { "ETA": 5.27, "epoch": 0.11480945489628558, "fp16_scale": 1.0, "global_step": 357, "grad_norm": 1.9286738656392266, "learning_rate": 1.9626846970420244e-06, "loss": 0.4863, "step": 357 }, { "ETA": 5.27, "epoch": 0.11513105000803987, "fp16_scale": 1.0, "global_step": 358, "grad_norm": 2.026612586228611, "learning_rate": 1.9624021860378324e-06, "loss": 0.382, "step": 358 }, { "ETA": 5.26, "epoch": 0.11545264511979418, "fp16_scale": 1.0, "global_step": 359, "grad_norm": 2.1481595085938614, "learning_rate": 1.962118630116931e-06, "loss": 0.3635, "step": 359 }, { "ETA": 5.26, "epoch": 0.11577424023154848, "fp16_scale": 1.0, "global_step": 360, "grad_norm": 2.203817729440887, "learning_rate": 1.9618340295871887e-06, "loss": 0.5334, "step": 360 }, { "ETA": 5.26, "epoch": 0.11609583534330278, "fp16_scale": 1.0, "global_step": 361, "grad_norm": 1.852740179654267, "learning_rate": 1.9615483847576057e-06, "loss": 0.4132, "step": 361 }, { "ETA": 5.26, "epoch": 0.11641743045505708, "fp16_scale": 1.0, "global_step": 362, "grad_norm": 2.1161828900485076, "learning_rate": 1.9612616959383188e-06, "loss": 0.4846, "step": 362 }, { "ETA": 5.25, "epoch": 0.11673902556681139, "fp16_scale": 1.0, "global_step": 363, "grad_norm": 2.191037208198011, "learning_rate": 1.960973963440596e-06, "loss": 0.3523, "step": 363 }, { "ETA": 5.25, "epoch": 0.11706062067856568, "fp16_scale": 1.0, "global_step": 364, "grad_norm": 2.063164123006888, "learning_rate": 1.96068518757684e-06, "loss": 0.4727, "step": 364 }, { "ETA": 5.24, "epoch": 0.11738221579031999, "fp16_scale": 1.0, "global_step": 365, "grad_norm": 2.020245619278409, "learning_rate": 1.9603953686605858e-06, "loss": 0.4733, "step": 365 }, { "ETA": 5.23, "epoch": 0.11770381090207428, "fp16_scale": 1.0, "global_step": 366, "grad_norm": 2.4414730636621056, "learning_rate": 1.9601045070065e-06, "loss": 0.4116, "step": 366 }, { "ETA": 5.24, "epoch": 0.11802540601382859, "fp16_scale": 1.0, "global_step": 367, "grad_norm": 1.8085290808011578, "learning_rate": 1.9598126029303836e-06, "loss": 0.473, "step": 367 }, { "ETA": 5.24, "epoch": 0.11834700112558288, "fp16_scale": 1.0, "global_step": 368, "grad_norm": 2.061404295864455, "learning_rate": 1.9595196567491665e-06, "loss": 0.48, "step": 368 }, { "ETA": 5.23, "epoch": 0.11866859623733719, "fp16_scale": 1.0, "global_step": 369, "grad_norm": 1.95202055920713, "learning_rate": 1.9592256687809125e-06, "loss": 0.4003, "step": 369 }, { "ETA": 5.23, "epoch": 0.11899019134909149, "fp16_scale": 1.0, "global_step": 370, "grad_norm": 1.8825953559894752, "learning_rate": 1.958930639344815e-06, "loss": 0.4016, "step": 370 }, { "ETA": 5.23, "epoch": 0.1193117864608458, "fp16_scale": 1.0, "global_step": 371, "grad_norm": 1.983391946392834, "learning_rate": 1.958634568761199e-06, "loss": 0.4306, "step": 371 }, { "ETA": 5.22, "epoch": 0.1196333815726001, "fp16_scale": 1.0, "global_step": 372, "grad_norm": 2.133957388129558, "learning_rate": 1.9583374573515197e-06, "loss": 0.4762, "step": 372 }, { "ETA": 5.22, "epoch": 0.1199549766843544, "fp16_scale": 1.0, "global_step": 373, "grad_norm": 2.150798639136317, "learning_rate": 1.958039305438362e-06, "loss": 0.3935, "step": 373 }, { "ETA": 5.22, "epoch": 0.1202765717961087, "fp16_scale": 1.0, "global_step": 374, "grad_norm": 1.9460695795967955, "learning_rate": 1.957740113345441e-06, "loss": 0.442, "step": 374 }, { "ETA": 5.21, "epoch": 0.120598166907863, "fp16_scale": 1.0, "global_step": 375, "grad_norm": 2.071648691990363, "learning_rate": 1.9574398813976005e-06, "loss": 0.4515, "step": 375 }, { "ETA": 5.22, "epoch": 0.12091976201961731, "fp16_scale": 1.0, "global_step": 376, "grad_norm": 2.3073717685750634, "learning_rate": 1.9571386099208142e-06, "loss": 0.4752, "step": 376 }, { "ETA": 5.21, "epoch": 0.1212413571313716, "fp16_scale": 1.0, "global_step": 377, "grad_norm": 2.323900215946003, "learning_rate": 1.956836299242184e-06, "loss": 0.4397, "step": 377 }, { "ETA": 5.21, "epoch": 0.12156295224312591, "fp16_scale": 1.0, "global_step": 378, "grad_norm": 2.2973185498237587, "learning_rate": 1.9565329496899403e-06, "loss": 0.4131, "step": 378 }, { "ETA": 5.2, "epoch": 0.1218845473548802, "fp16_scale": 1.0, "global_step": 379, "grad_norm": 1.9221938623341537, "learning_rate": 1.9562285615934408e-06, "loss": 0.4085, "step": 379 }, { "ETA": 5.2, "epoch": 0.12220614246663451, "fp16_scale": 1.0, "global_step": 380, "grad_norm": 2.145805547493304, "learning_rate": 1.9559231352831715e-06, "loss": 0.5559, "step": 380 }, { "ETA": 5.2, "epoch": 0.1225277375783888, "fp16_scale": 1.0, "global_step": 381, "grad_norm": 2.004554060401739, "learning_rate": 1.955616671090745e-06, "loss": 0.4517, "step": 381 }, { "ETA": 5.2, "epoch": 0.12284933269014311, "fp16_scale": 1.0, "global_step": 382, "grad_norm": 2.096385241775676, "learning_rate": 1.9553091693489016e-06, "loss": 0.5358, "step": 382 }, { "ETA": 5.2, "epoch": 0.12317092780189741, "fp16_scale": 1.0, "global_step": 383, "grad_norm": 2.1179397128222703, "learning_rate": 1.955000630391508e-06, "loss": 0.469, "step": 383 }, { "ETA": 5.2, "epoch": 0.12349252291365172, "fp16_scale": 1.0, "global_step": 384, "grad_norm": 1.885263273180588, "learning_rate": 1.9546910545535556e-06, "loss": 0.3887, "step": 384 }, { "ETA": 5.2, "epoch": 0.12381411802540601, "fp16_scale": 1.0, "global_step": 385, "grad_norm": 2.205463140577137, "learning_rate": 1.9543804421711636e-06, "loss": 0.5082, "step": 385 }, { "ETA": 5.2, "epoch": 0.12413571313716032, "fp16_scale": 1.0, "global_step": 386, "grad_norm": 2.03347548234032, "learning_rate": 1.954068793581575e-06, "loss": 0.4349, "step": 386 }, { "ETA": 5.19, "epoch": 0.12445730824891461, "fp16_scale": 1.0, "global_step": 387, "grad_norm": 2.078026220588503, "learning_rate": 1.9537561091231596e-06, "loss": 0.4077, "step": 387 }, { "ETA": 5.19, "epoch": 0.12477890336066892, "fp16_scale": 1.0, "global_step": 388, "grad_norm": 1.9983428126578808, "learning_rate": 1.95344238913541e-06, "loss": 0.4936, "step": 388 }, { "ETA": 5.19, "epoch": 0.12510049847242322, "fp16_scale": 1.0, "global_step": 389, "grad_norm": 1.9843240996334781, "learning_rate": 1.953127633958944e-06, "loss": 0.4746, "step": 389 }, { "ETA": 5.19, "epoch": 0.12542209358417752, "fp16_scale": 1.0, "global_step": 390, "grad_norm": 2.0047320593603777, "learning_rate": 1.952811843935503e-06, "loss": 0.494, "step": 390 }, { "ETA": 5.18, "epoch": 0.12574368869593183, "fp16_scale": 1.0, "global_step": 391, "grad_norm": 2.1912314948321923, "learning_rate": 1.9524950194079533e-06, "loss": 0.467, "step": 391 }, { "ETA": 5.18, "epoch": 0.1260652838076861, "fp16_scale": 1.0, "global_step": 392, "grad_norm": 2.011092299877131, "learning_rate": 1.952177160720282e-06, "loss": 0.4499, "step": 392 }, { "ETA": 5.17, "epoch": 0.12638687891944042, "fp16_scale": 1.0, "global_step": 393, "grad_norm": 2.040087836819472, "learning_rate": 1.9518582682176016e-06, "loss": 0.3774, "step": 393 }, { "ETA": 5.17, "epoch": 0.12670847403119473, "fp16_scale": 1.0, "global_step": 394, "grad_norm": 1.9845613783170721, "learning_rate": 1.9515383422461455e-06, "loss": 0.4742, "step": 394 }, { "ETA": 5.17, "epoch": 0.12703006914294904, "fp16_scale": 1.0, "global_step": 395, "grad_norm": 1.7916576850809898, "learning_rate": 1.9512173831532686e-06, "loss": 0.4283, "step": 395 }, { "ETA": 5.17, "epoch": 0.12735166425470332, "fp16_scale": 1.0, "global_step": 396, "grad_norm": 1.8518207434023177, "learning_rate": 1.95089539128745e-06, "loss": 0.398, "step": 396 }, { "ETA": 5.17, "epoch": 0.12767325936645763, "fp16_scale": 1.0, "global_step": 397, "grad_norm": 1.9396466358034898, "learning_rate": 1.950572366998287e-06, "loss": 0.3812, "step": 397 }, { "ETA": 5.17, "epoch": 0.12799485447821193, "fp16_scale": 1.0, "global_step": 398, "grad_norm": 2.1996149256600623, "learning_rate": 1.9502483106365e-06, "loss": 0.4114, "step": 398 }, { "ETA": 5.17, "epoch": 0.12831644958996624, "fp16_scale": 1.0, "global_step": 399, "grad_norm": 2.0924296142233763, "learning_rate": 1.94992322255393e-06, "loss": 0.5468, "step": 399 }, { "ETA": 5.17, "epoch": 0.12863804470172052, "fp16_scale": 1.0, "global_step": 400, "grad_norm": 2.1111375806811656, "learning_rate": 1.9495971031035363e-06, "loss": 0.4113, "step": 400 }, { "ETA": 5.22, "epoch": 0.12895963981347483, "fp16_scale": 1.0, "global_step": 401, "grad_norm": 2.0505877642386077, "learning_rate": 1.9492699526394e-06, "loss": 0.533, "step": 401 }, { "ETA": 5.22, "epoch": 0.12928123492522914, "fp16_scale": 1.0, "global_step": 402, "grad_norm": 2.2752919331626162, "learning_rate": 1.948941771516721e-06, "loss": 0.4066, "step": 402 }, { "ETA": 5.21, "epoch": 0.12960283003698345, "fp16_scale": 1.0, "global_step": 403, "grad_norm": 1.9498187127382178, "learning_rate": 1.9486125600918176e-06, "loss": 0.3798, "step": 403 }, { "ETA": 5.21, "epoch": 0.12992442514873773, "fp16_scale": 1.0, "global_step": 404, "grad_norm": 2.019174704757042, "learning_rate": 1.948282318722127e-06, "loss": 0.4407, "step": 404 }, { "ETA": 5.21, "epoch": 0.13024602026049203, "fp16_scale": 1.0, "global_step": 405, "grad_norm": 1.930174696471365, "learning_rate": 1.947951047766205e-06, "loss": 0.4836, "step": 405 }, { "ETA": 5.2, "epoch": 0.13056761537224634, "fp16_scale": 1.0, "global_step": 406, "grad_norm": 2.3780292591951055, "learning_rate": 1.9476187475837253e-06, "loss": 0.4406, "step": 406 }, { "ETA": 5.2, "epoch": 0.13088921048400065, "fp16_scale": 1.0, "global_step": 407, "grad_norm": 1.9749968561931892, "learning_rate": 1.947285418535479e-06, "loss": 0.4926, "step": 407 }, { "ETA": 5.2, "epoch": 0.13121080559575493, "fp16_scale": 1.0, "global_step": 408, "grad_norm": 1.9118323778875925, "learning_rate": 1.9469510609833736e-06, "loss": 0.4718, "step": 408 }, { "ETA": 5.2, "epoch": 0.13153240070750924, "fp16_scale": 1.0, "global_step": 409, "grad_norm": 1.9557921694287344, "learning_rate": 1.946615675290434e-06, "loss": 0.4796, "step": 409 }, { "ETA": 5.2, "epoch": 0.13185399581926355, "fp16_scale": 1.0, "global_step": 410, "grad_norm": 1.7568646908908685, "learning_rate": 1.9462792618208016e-06, "loss": 0.4673, "step": 410 }, { "ETA": 5.2, "epoch": 0.13217559093101786, "fp16_scale": 1.0, "global_step": 411, "grad_norm": 1.9691516740573827, "learning_rate": 1.945941820939733e-06, "loss": 0.5125, "step": 411 }, { "ETA": 5.2, "epoch": 0.13249718604277216, "fp16_scale": 1.0, "global_step": 412, "grad_norm": 2.1649453318010603, "learning_rate": 1.9456033530136006e-06, "loss": 0.5044, "step": 412 }, { "ETA": 5.19, "epoch": 0.13281878115452644, "fp16_scale": 1.0, "global_step": 413, "grad_norm": 2.1501165173009693, "learning_rate": 1.945263858409892e-06, "loss": 0.5147, "step": 413 }, { "ETA": 5.19, "epoch": 0.13314037626628075, "fp16_scale": 1.0, "global_step": 414, "grad_norm": 2.1142807297947805, "learning_rate": 1.9449233374972092e-06, "loss": 0.5006, "step": 414 }, { "ETA": 5.19, "epoch": 0.13346197137803506, "fp16_scale": 1.0, "global_step": 415, "grad_norm": 1.937485606893142, "learning_rate": 1.9445817906452695e-06, "loss": 0.4537, "step": 415 }, { "ETA": 5.19, "epoch": 0.13378356648978937, "fp16_scale": 1.0, "global_step": 416, "grad_norm": 2.1203546732040595, "learning_rate": 1.944239218224902e-06, "loss": 0.4673, "step": 416 }, { "ETA": 5.19, "epoch": 0.13410516160154365, "fp16_scale": 1.0, "global_step": 417, "grad_norm": 1.9626627677598947, "learning_rate": 1.9438956206080523e-06, "loss": 0.3959, "step": 417 }, { "ETA": 5.19, "epoch": 0.13442675671329796, "fp16_scale": 1.0, "global_step": 418, "grad_norm": 1.947392905660049, "learning_rate": 1.943550998167776e-06, "loss": 0.59, "step": 418 }, { "ETA": 5.19, "epoch": 0.13474835182505226, "fp16_scale": 1.0, "global_step": 419, "grad_norm": 2.2259788409758157, "learning_rate": 1.9432053512782435e-06, "loss": 0.4874, "step": 419 }, { "ETA": 5.18, "epoch": 0.13506994693680657, "fp16_scale": 1.0, "global_step": 420, "grad_norm": 2.325649234666594, "learning_rate": 1.9428586803147364e-06, "loss": 0.5795, "step": 420 }, { "ETA": 5.18, "epoch": 0.13539154204856085, "fp16_scale": 1.0, "global_step": 421, "grad_norm": 2.0260415473338163, "learning_rate": 1.942510985653649e-06, "loss": 0.4914, "step": 421 }, { "ETA": 5.18, "epoch": 0.13571313716031516, "fp16_scale": 1.0, "global_step": 422, "grad_norm": 1.898521737983934, "learning_rate": 1.942162267672486e-06, "loss": 0.4755, "step": 422 }, { "ETA": 5.18, "epoch": 0.13603473227206947, "fp16_scale": 1.0, "global_step": 423, "grad_norm": 2.144860138023159, "learning_rate": 1.941812526749865e-06, "loss": 0.5083, "step": 423 }, { "ETA": 5.18, "epoch": 0.13635632738382378, "fp16_scale": 1.0, "global_step": 424, "grad_norm": 1.99456776136853, "learning_rate": 1.9414617632655112e-06, "loss": 0.4749, "step": 424 }, { "ETA": 5.18, "epoch": 0.13667792249557806, "fp16_scale": 1.0, "global_step": 425, "grad_norm": 2.254945105453509, "learning_rate": 1.9411099776002635e-06, "loss": 0.5435, "step": 425 }, { "ETA": 5.18, "epoch": 0.13699951760733237, "fp16_scale": 1.0, "global_step": 426, "grad_norm": 1.9695960187702708, "learning_rate": 1.940757170136068e-06, "loss": 0.4438, "step": 426 }, { "ETA": 5.18, "epoch": 0.13732111271908667, "fp16_scale": 1.0, "global_step": 427, "grad_norm": 2.0018331783259615, "learning_rate": 1.9404033412559825e-06, "loss": 0.4976, "step": 427 }, { "ETA": 5.17, "epoch": 0.13764270783084098, "fp16_scale": 1.0, "global_step": 428, "grad_norm": 1.9666528999917499, "learning_rate": 1.940048491344171e-06, "loss": 0.4785, "step": 428 }, { "ETA": 5.17, "epoch": 0.13796430294259526, "fp16_scale": 1.0, "global_step": 429, "grad_norm": 1.8885843306751877, "learning_rate": 1.9396926207859082e-06, "loss": 0.5042, "step": 429 }, { "ETA": 5.17, "epoch": 0.13828589805434957, "fp16_scale": 1.0, "global_step": 430, "grad_norm": 1.9135829325509608, "learning_rate": 1.9393357299675764e-06, "loss": 0.5285, "step": 430 }, { "ETA": 5.17, "epoch": 0.13860749316610388, "fp16_scale": 1.0, "global_step": 431, "grad_norm": 2.038493470995957, "learning_rate": 1.9389778192766656e-06, "loss": 0.4968, "step": 431 }, { "ETA": 5.17, "epoch": 0.1389290882778582, "fp16_scale": 1.0, "global_step": 432, "grad_norm": 1.8705922440534466, "learning_rate": 1.938618889101773e-06, "loss": 0.4157, "step": 432 }, { "ETA": 5.17, "epoch": 0.13925068338961247, "fp16_scale": 1.0, "global_step": 433, "grad_norm": 1.7352670425372603, "learning_rate": 1.938258939832602e-06, "loss": 0.4729, "step": 433 }, { "ETA": 5.17, "epoch": 0.13957227850136678, "fp16_scale": 1.0, "global_step": 434, "grad_norm": 2.4242807468324874, "learning_rate": 1.9378979718599642e-06, "loss": 0.4365, "step": 434 }, { "ETA": 5.16, "epoch": 0.13989387361312108, "fp16_scale": 1.0, "global_step": 435, "grad_norm": 1.916994664393028, "learning_rate": 1.9375359855757766e-06, "loss": 0.4091, "step": 435 }, { "ETA": 5.16, "epoch": 0.1402154687248754, "fp16_scale": 1.0, "global_step": 436, "grad_norm": 1.8881238474894722, "learning_rate": 1.9371729813730604e-06, "loss": 0.4938, "step": 436 }, { "ETA": 5.16, "epoch": 0.14053706383662967, "fp16_scale": 1.0, "global_step": 437, "grad_norm": 2.1349766080230346, "learning_rate": 1.9368089596459438e-06, "loss": 0.4765, "step": 437 }, { "ETA": 5.16, "epoch": 0.14085865894838398, "fp16_scale": 1.0, "global_step": 438, "grad_norm": 2.127160685881295, "learning_rate": 1.936443920789658e-06, "loss": 0.5, "step": 438 }, { "ETA": 5.15, "epoch": 0.1411802540601383, "fp16_scale": 1.0, "global_step": 439, "grad_norm": 1.8930260580501126, "learning_rate": 1.9360778652005414e-06, "loss": 0.4855, "step": 439 }, { "ETA": 5.15, "epoch": 0.1415018491718926, "fp16_scale": 1.0, "global_step": 440, "grad_norm": 1.9548799762103741, "learning_rate": 1.9357107932760332e-06, "loss": 0.4879, "step": 440 }, { "ETA": 5.15, "epoch": 0.14182344428364688, "fp16_scale": 1.0, "global_step": 441, "grad_norm": 1.9317495953477675, "learning_rate": 1.9353427054146774e-06, "loss": 0.5375, "step": 441 }, { "ETA": 5.16, "epoch": 0.14214503939540118, "fp16_scale": 1.0, "global_step": 442, "grad_norm": 1.9857593253761028, "learning_rate": 1.934973602016122e-06, "loss": 0.4163, "step": 442 }, { "ETA": 5.15, "epoch": 0.1424666345071555, "fp16_scale": 1.0, "global_step": 443, "grad_norm": 1.9173337263435726, "learning_rate": 1.9346034834811153e-06, "loss": 0.452, "step": 443 }, { "ETA": 5.15, "epoch": 0.1427882296189098, "fp16_scale": 1.0, "global_step": 444, "grad_norm": 2.181530341125949, "learning_rate": 1.93423235021151e-06, "loss": 0.3778, "step": 444 }, { "ETA": 5.14, "epoch": 0.14310982473066408, "fp16_scale": 1.0, "global_step": 445, "grad_norm": 1.9846459794648879, "learning_rate": 1.9338602026102594e-06, "loss": 0.3937, "step": 445 }, { "ETA": 5.14, "epoch": 0.1434314198424184, "fp16_scale": 1.0, "global_step": 446, "grad_norm": 1.8759579249811038, "learning_rate": 1.9334870410814178e-06, "loss": 0.4015, "step": 446 }, { "ETA": 5.13, "epoch": 0.1437530149541727, "fp16_scale": 1.0, "global_step": 447, "grad_norm": 1.8976000883677417, "learning_rate": 1.9331128660301417e-06, "loss": 0.3701, "step": 447 }, { "ETA": 5.13, "epoch": 0.144074610065927, "fp16_scale": 1.0, "global_step": 448, "grad_norm": 2.0830861561346796, "learning_rate": 1.932737677862687e-06, "loss": 0.4587, "step": 448 }, { "ETA": 5.13, "epoch": 0.14439620517768129, "fp16_scale": 1.0, "global_step": 449, "grad_norm": 2.023843607690428, "learning_rate": 1.932361476986409e-06, "loss": 0.5054, "step": 449 }, { "ETA": 5.13, "epoch": 0.1447178002894356, "fp16_scale": 1.0, "global_step": 450, "grad_norm": 2.096781199048923, "learning_rate": 1.9319842638097644e-06, "loss": 0.3842, "step": 450 }, { "ETA": 5.13, "epoch": 0.1450393954011899, "fp16_scale": 1.0, "global_step": 451, "grad_norm": 1.9321513122728664, "learning_rate": 1.9316060387423074e-06, "loss": 0.5316, "step": 451 }, { "ETA": 5.12, "epoch": 0.1453609905129442, "fp16_scale": 1.0, "global_step": 452, "grad_norm": 2.3279603223291074, "learning_rate": 1.9312268021946916e-06, "loss": 0.4583, "step": 452 }, { "ETA": 5.12, "epoch": 0.1456825856246985, "fp16_scale": 1.0, "global_step": 453, "grad_norm": 2.014031977512335, "learning_rate": 1.9308465545786682e-06, "loss": 0.4189, "step": 453 }, { "ETA": 5.11, "epoch": 0.1460041807364528, "fp16_scale": 1.0, "global_step": 454, "grad_norm": 7.418229393654079, "learning_rate": 1.9304652963070866e-06, "loss": 0.4562, "step": 454 }, { "ETA": 5.11, "epoch": 0.1463257758482071, "fp16_scale": 1.0, "global_step": 455, "grad_norm": 2.0383858891895215, "learning_rate": 1.9300830277938934e-06, "loss": 0.3989, "step": 455 }, { "ETA": 5.11, "epoch": 0.14664737095996142, "fp16_scale": 1.0, "global_step": 456, "grad_norm": 1.9722056588404926, "learning_rate": 1.9296997494541327e-06, "loss": 0.4759, "step": 456 }, { "ETA": 5.11, "epoch": 0.14696896607171572, "fp16_scale": 1.0, "global_step": 457, "grad_norm": 2.1272312848730106, "learning_rate": 1.9293154617039436e-06, "loss": 0.4357, "step": 457 }, { "ETA": 5.11, "epoch": 0.14729056118347, "fp16_scale": 1.0, "global_step": 458, "grad_norm": 1.8574626919046473, "learning_rate": 1.928930164960562e-06, "loss": 0.4691, "step": 458 }, { "ETA": 5.11, "epoch": 0.1476121562952243, "fp16_scale": 1.0, "global_step": 459, "grad_norm": 1.9651718600396562, "learning_rate": 1.92854385964232e-06, "loss": 0.4896, "step": 459 }, { "ETA": 5.11, "epoch": 0.14793375140697862, "fp16_scale": 1.0, "global_step": 460, "grad_norm": 1.9352993455502014, "learning_rate": 1.9281565461686436e-06, "loss": 0.4551, "step": 460 }, { "ETA": 5.1, "epoch": 0.14825534651873293, "fp16_scale": 1.0, "global_step": 461, "grad_norm": 1.912081992082377, "learning_rate": 1.9277682249600533e-06, "loss": 0.5035, "step": 461 }, { "ETA": 5.1, "epoch": 0.1485769416304872, "fp16_scale": 1.0, "global_step": 462, "grad_norm": 2.0499940009129007, "learning_rate": 1.9273788964381647e-06, "loss": 0.4614, "step": 462 }, { "ETA": 5.1, "epoch": 0.14889853674224152, "fp16_scale": 1.0, "global_step": 463, "grad_norm": 2.010885292855413, "learning_rate": 1.9269885610256865e-06, "loss": 0.4487, "step": 463 }, { "ETA": 5.1, "epoch": 0.14922013185399582, "fp16_scale": 1.0, "global_step": 464, "grad_norm": 2.0314188890871083, "learning_rate": 1.926597219146421e-06, "loss": 0.475, "step": 464 }, { "ETA": 5.1, "epoch": 0.14954172696575013, "fp16_scale": 1.0, "global_step": 465, "grad_norm": 1.9700836175690337, "learning_rate": 1.9262048712252623e-06, "loss": 0.478, "step": 465 }, { "ETA": 5.1, "epoch": 0.1498633220775044, "fp16_scale": 1.0, "global_step": 466, "grad_norm": 2.071677060071096, "learning_rate": 1.925811517688198e-06, "loss": 0.4219, "step": 466 }, { "ETA": 5.1, "epoch": 0.15018491718925872, "fp16_scale": 1.0, "global_step": 467, "grad_norm": 1.846185694632217, "learning_rate": 1.9254171589623074e-06, "loss": 0.4181, "step": 467 }, { "ETA": 5.1, "epoch": 0.15050651230101303, "fp16_scale": 1.0, "global_step": 468, "grad_norm": 1.8185100096356368, "learning_rate": 1.92502179547576e-06, "loss": 0.3918, "step": 468 }, { "ETA": 5.1, "epoch": 0.15082810741276734, "fp16_scale": 1.0, "global_step": 469, "grad_norm": 2.23559095566495, "learning_rate": 1.9246254276578174e-06, "loss": 0.5203, "step": 469 }, { "ETA": 5.09, "epoch": 0.15114970252452162, "fp16_scale": 1.0, "global_step": 470, "grad_norm": 2.0903476148768294, "learning_rate": 1.924228055938831e-06, "loss": 0.4333, "step": 470 }, { "ETA": 5.09, "epoch": 0.15147129763627593, "fp16_scale": 1.0, "global_step": 471, "grad_norm": 2.6044925679999684, "learning_rate": 1.9238296807502427e-06, "loss": 0.4802, "step": 471 }, { "ETA": 5.09, "epoch": 0.15179289274803023, "fp16_scale": 1.0, "global_step": 472, "grad_norm": 1.9959524898824763, "learning_rate": 1.9234303025245833e-06, "loss": 0.4816, "step": 472 }, { "ETA": 5.08, "epoch": 0.15211448785978454, "fp16_scale": 1.0, "global_step": 473, "grad_norm": 1.8932233597555206, "learning_rate": 1.9230299216954734e-06, "loss": 0.4265, "step": 473 }, { "ETA": 5.08, "epoch": 0.15243608297153882, "fp16_scale": 1.0, "global_step": 474, "grad_norm": 1.9164271497723384, "learning_rate": 1.922628538697621e-06, "loss": 0.4487, "step": 474 }, { "ETA": 5.08, "epoch": 0.15275767808329313, "fp16_scale": 1.0, "global_step": 475, "grad_norm": 1.936569233446201, "learning_rate": 1.922226153966824e-06, "loss": 0.4762, "step": 475 }, { "ETA": 5.08, "epoch": 0.15307927319504744, "fp16_scale": 1.0, "global_step": 476, "grad_norm": 2.027042737818172, "learning_rate": 1.9218227679399657e-06, "loss": 0.4988, "step": 476 }, { "ETA": 5.08, "epoch": 0.15340086830680175, "fp16_scale": 1.0, "global_step": 477, "grad_norm": 1.972876234639711, "learning_rate": 1.921418381055018e-06, "loss": 0.5094, "step": 477 }, { "ETA": 5.08, "epoch": 0.15372246341855603, "fp16_scale": 1.0, "global_step": 478, "grad_norm": 1.9803998308295407, "learning_rate": 1.92101299375104e-06, "loss": 0.4616, "step": 478 }, { "ETA": 5.08, "epoch": 0.15404405853031033, "fp16_scale": 1.0, "global_step": 479, "grad_norm": 2.032838491790377, "learning_rate": 1.920606606468175e-06, "loss": 0.4205, "step": 479 }, { "ETA": 5.08, "epoch": 0.15436565364206464, "fp16_scale": 1.0, "global_step": 480, "grad_norm": 2.111802314751689, "learning_rate": 1.9201992196476533e-06, "loss": 0.4826, "step": 480 }, { "ETA": 5.07, "epoch": 0.15468724875381895, "fp16_scale": 1.0, "global_step": 481, "grad_norm": 2.117507142649786, "learning_rate": 1.919790833731791e-06, "loss": 0.3695, "step": 481 }, { "ETA": 5.06, "epoch": 0.15500884386557323, "fp16_scale": 1.0, "global_step": 482, "grad_norm": 2.1214152384528373, "learning_rate": 1.919381449163988e-06, "loss": 0.4471, "step": 482 }, { "ETA": 5.06, "epoch": 0.15533043897732754, "fp16_scale": 1.0, "global_step": 483, "grad_norm": 2.095435501434622, "learning_rate": 1.9189710663887276e-06, "loss": 0.4821, "step": 483 }, { "ETA": 5.06, "epoch": 0.15565203408908185, "fp16_scale": 1.0, "global_step": 484, "grad_norm": 2.03919483012636, "learning_rate": 1.9185596858515797e-06, "loss": 0.3305, "step": 484 }, { "ETA": 5.05, "epoch": 0.15597362920083616, "fp16_scale": 1.0, "global_step": 485, "grad_norm": 2.057899679469343, "learning_rate": 1.918147307999195e-06, "loss": 0.4001, "step": 485 }, { "ETA": 5.05, "epoch": 0.15629522431259044, "fp16_scale": 1.0, "global_step": 486, "grad_norm": 2.181468956952193, "learning_rate": 1.9177339332793075e-06, "loss": 0.3925, "step": 486 }, { "ETA": 5.04, "epoch": 0.15661681942434474, "fp16_scale": 1.0, "global_step": 487, "grad_norm": 1.9415284085155948, "learning_rate": 1.917319562140735e-06, "loss": 0.4425, "step": 487 }, { "ETA": 5.04, "epoch": 0.15693841453609905, "fp16_scale": 1.0, "global_step": 488, "grad_norm": 2.0426162218981725, "learning_rate": 1.9169041950333747e-06, "loss": 0.4202, "step": 488 }, { "ETA": 5.04, "epoch": 0.15726000964785336, "fp16_scale": 1.0, "global_step": 489, "grad_norm": 1.7610985955624534, "learning_rate": 1.9164878324082073e-06, "loss": 0.4983, "step": 489 }, { "ETA": 5.04, "epoch": 0.15758160475960764, "fp16_scale": 1.0, "global_step": 490, "grad_norm": 1.830034328847447, "learning_rate": 1.9160704747172933e-06, "loss": 0.4906, "step": 490 }, { "ETA": 5.04, "epoch": 0.15790319987136195, "fp16_scale": 1.0, "global_step": 491, "grad_norm": 1.985103108443925, "learning_rate": 1.9156521224137742e-06, "loss": 0.4479, "step": 491 }, { "ETA": 5.04, "epoch": 0.15822479498311626, "fp16_scale": 1.0, "global_step": 492, "grad_norm": 1.9678965370654322, "learning_rate": 1.91523277595187e-06, "loss": 0.4031, "step": 492 }, { "ETA": 5.04, "epoch": 0.15854639009487057, "fp16_scale": 1.0, "global_step": 493, "grad_norm": 1.8801773585155792, "learning_rate": 1.9148124357868828e-06, "loss": 0.4581, "step": 493 }, { "ETA": 5.04, "epoch": 0.15886798520662485, "fp16_scale": 1.0, "global_step": 494, "grad_norm": 2.0481246439049863, "learning_rate": 1.9143911023751902e-06, "loss": 0.4102, "step": 494 }, { "ETA": 5.03, "epoch": 0.15918958031837915, "fp16_scale": 1.0, "global_step": 495, "grad_norm": 2.100424592359454, "learning_rate": 1.9139687761742512e-06, "loss": 0.4851, "step": 495 }, { "ETA": 5.03, "epoch": 0.15951117543013346, "fp16_scale": 1.0, "global_step": 496, "grad_norm": 2.0182661530368824, "learning_rate": 1.9135454576426007e-06, "loss": 0.4697, "step": 496 }, { "ETA": 5.03, "epoch": 0.15983277054188777, "fp16_scale": 1.0, "global_step": 497, "grad_norm": 2.442367034410736, "learning_rate": 1.913121147239852e-06, "loss": 0.4239, "step": 497 }, { "ETA": 5.03, "epoch": 0.16015436565364208, "fp16_scale": 1.0, "global_step": 498, "grad_norm": 2.114188022740362, "learning_rate": 1.9126958454266954e-06, "loss": 0.4819, "step": 498 }, { "ETA": 5.03, "epoch": 0.16047596076539636, "fp16_scale": 1.0, "global_step": 499, "grad_norm": 2.166891992796271, "learning_rate": 1.9122695526648967e-06, "loss": 0.5122, "step": 499 }, { "ETA": 5.03, "epoch": 0.16079755587715067, "fp16_scale": 1.0, "global_step": 500, "grad_norm": 2.0689448466980225, "learning_rate": 1.9118422694172984e-06, "loss": 0.4343, "step": 500 }, { "ETA": 5.03, "epoch": 0.16111915098890497, "fp16_scale": 1.0, "global_step": 501, "grad_norm": 1.9182557739692911, "learning_rate": 1.9114139961478182e-06, "loss": 0.3912, "step": 501 }, { "ETA": 5.03, "epoch": 0.16144074610065928, "fp16_scale": 1.0, "global_step": 502, "grad_norm": 2.023205269994096, "learning_rate": 1.910984733321449e-06, "loss": 0.4111, "step": 502 }, { "ETA": 5.02, "epoch": 0.16176234121241356, "fp16_scale": 1.0, "global_step": 503, "grad_norm": 1.8019197922059353, "learning_rate": 1.9105544814042574e-06, "loss": 0.4212, "step": 503 }, { "ETA": 5.02, "epoch": 0.16208393632416787, "fp16_scale": 1.0, "global_step": 504, "grad_norm": 1.997365979153901, "learning_rate": 1.9101232408633842e-06, "loss": 0.5027, "step": 504 }, { "ETA": 5.02, "epoch": 0.16240553143592218, "fp16_scale": 1.0, "global_step": 505, "grad_norm": 2.1495963330876875, "learning_rate": 1.9096910121670443e-06, "loss": 0.3767, "step": 505 }, { "ETA": 5.01, "epoch": 0.1627271265476765, "fp16_scale": 1.0, "global_step": 506, "grad_norm": 2.1017123576575787, "learning_rate": 1.909257795784524e-06, "loss": 0.4223, "step": 506 }, { "ETA": 5.01, "epoch": 0.16304872165943077, "fp16_scale": 1.0, "global_step": 507, "grad_norm": 1.8670850009944489, "learning_rate": 1.9088235921861836e-06, "loss": 0.4779, "step": 507 }, { "ETA": 5.01, "epoch": 0.16337031677118508, "fp16_scale": 1.0, "global_step": 508, "grad_norm": 2.1192504020841043, "learning_rate": 1.908388401843454e-06, "loss": 0.4734, "step": 508 }, { "ETA": 5.01, "epoch": 0.16369191188293938, "fp16_scale": 1.0, "global_step": 509, "grad_norm": 2.0041831162050276, "learning_rate": 1.9079522252288387e-06, "loss": 0.4586, "step": 509 }, { "ETA": 5.0, "epoch": 0.1640135069946937, "fp16_scale": 1.0, "global_step": 510, "grad_norm": 2.1063411475899234, "learning_rate": 1.90751506281591e-06, "loss": 0.4009, "step": 510 }, { "ETA": 5.0, "epoch": 0.16433510210644797, "fp16_scale": 1.0, "global_step": 511, "grad_norm": 2.569817215911691, "learning_rate": 1.9070769150793127e-06, "loss": 0.5017, "step": 511 }, { "ETA": 5.0, "epoch": 0.16465669721820228, "fp16_scale": 1.0, "global_step": 512, "grad_norm": 1.8061384404289822, "learning_rate": 1.9066377824947603e-06, "loss": 0.4823, "step": 512 }, { "ETA": 5.0, "epoch": 0.1649782923299566, "fp16_scale": 1.0, "global_step": 513, "grad_norm": 2.246681058475501, "learning_rate": 1.9061976655390354e-06, "loss": 0.3808, "step": 513 }, { "ETA": 4.99, "epoch": 0.1652998874417109, "fp16_scale": 1.0, "global_step": 514, "grad_norm": 2.2809229794345414, "learning_rate": 1.9057565646899905e-06, "loss": 0.373, "step": 514 }, { "ETA": 4.99, "epoch": 0.16562148255346518, "fp16_scale": 1.0, "global_step": 515, "grad_norm": 2.24739897262686, "learning_rate": 1.9053144804265448e-06, "loss": 0.4451, "step": 515 }, { "ETA": 4.98, "epoch": 0.16594307766521948, "fp16_scale": 1.0, "global_step": 516, "grad_norm": 1.9156854430478418, "learning_rate": 1.9048714132286867e-06, "loss": 0.4203, "step": 516 }, { "ETA": 4.98, "epoch": 0.1662646727769738, "fp16_scale": 1.0, "global_step": 517, "grad_norm": 1.926065867868701, "learning_rate": 1.9044273635774704e-06, "loss": 0.438, "step": 517 }, { "ETA": 4.97, "epoch": 0.1665862678887281, "fp16_scale": 1.0, "global_step": 518, "grad_norm": 2.4219151111659167, "learning_rate": 1.903982331955018e-06, "loss": 0.4361, "step": 518 }, { "ETA": 4.97, "epoch": 0.16690786300048238, "fp16_scale": 1.0, "global_step": 519, "grad_norm": 1.9730124219605953, "learning_rate": 1.9035363188445178e-06, "loss": 0.4027, "step": 519 }, { "ETA": 4.97, "epoch": 0.1672294581122367, "fp16_scale": 1.0, "global_step": 520, "grad_norm": 1.9555521797448514, "learning_rate": 1.9030893247302217e-06, "loss": 0.4653, "step": 520 }, { "ETA": 4.96, "epoch": 0.167551053223991, "fp16_scale": 1.0, "global_step": 521, "grad_norm": 1.9937224078125575, "learning_rate": 1.90264135009745e-06, "loss": 0.5008, "step": 521 }, { "ETA": 4.96, "epoch": 0.1678726483357453, "fp16_scale": 1.0, "global_step": 522, "grad_norm": 2.000642568224559, "learning_rate": 1.9021923954325844e-06, "loss": 0.5437, "step": 522 }, { "ETA": 4.96, "epoch": 0.1681942434474996, "fp16_scale": 1.0, "global_step": 523, "grad_norm": 1.9943212517219038, "learning_rate": 1.901742461223073e-06, "loss": 0.4717, "step": 523 }, { "ETA": 4.96, "epoch": 0.1685158385592539, "fp16_scale": 1.0, "global_step": 524, "grad_norm": 2.122839075375804, "learning_rate": 1.9012915479574262e-06, "loss": 0.485, "step": 524 }, { "ETA": 4.96, "epoch": 0.1688374336710082, "fp16_scale": 1.0, "global_step": 525, "grad_norm": 1.9468881722832705, "learning_rate": 1.900839656125217e-06, "loss": 0.4064, "step": 525 }, { "ETA": 4.96, "epoch": 0.1691590287827625, "fp16_scale": 1.0, "global_step": 526, "grad_norm": 2.1150096020163605, "learning_rate": 1.900386786217083e-06, "loss": 0.4396, "step": 526 }, { "ETA": 4.96, "epoch": 0.1694806238945168, "fp16_scale": 1.0, "global_step": 527, "grad_norm": 2.012879701644392, "learning_rate": 1.8999329387247215e-06, "loss": 0.4797, "step": 527 }, { "ETA": 4.95, "epoch": 0.1698022190062711, "fp16_scale": 1.0, "global_step": 528, "grad_norm": 2.031934706209426, "learning_rate": 1.8994781141408919e-06, "loss": 0.4477, "step": 528 }, { "ETA": 4.95, "epoch": 0.1701238141180254, "fp16_scale": 1.0, "global_step": 529, "grad_norm": 2.03686582745526, "learning_rate": 1.8990223129594145e-06, "loss": 0.4597, "step": 529 }, { "ETA": 4.95, "epoch": 0.17044540922977972, "fp16_scale": 1.0, "global_step": 530, "grad_norm": 1.8402927637301583, "learning_rate": 1.8985655356751703e-06, "loss": 0.4299, "step": 530 }, { "ETA": 4.95, "epoch": 0.170767004341534, "fp16_scale": 1.0, "global_step": 531, "grad_norm": 2.1026640221571515, "learning_rate": 1.8981077827841e-06, "loss": 0.4799, "step": 531 }, { "ETA": 4.95, "epoch": 0.1710885994532883, "fp16_scale": 1.0, "global_step": 532, "grad_norm": 1.919984508497446, "learning_rate": 1.8976490547832032e-06, "loss": 0.4408, "step": 532 }, { "ETA": 4.95, "epoch": 0.1714101945650426, "fp16_scale": 1.0, "global_step": 533, "grad_norm": 2.0140479254351025, "learning_rate": 1.897189352170538e-06, "loss": 0.3843, "step": 533 }, { "ETA": 4.94, "epoch": 0.17173178967679692, "fp16_scale": 1.0, "global_step": 534, "grad_norm": 2.223144969945552, "learning_rate": 1.8967286754452212e-06, "loss": 0.4629, "step": 534 }, { "ETA": 4.94, "epoch": 0.1720533847885512, "fp16_scale": 1.0, "global_step": 535, "grad_norm": 2.2632666917839246, "learning_rate": 1.8962670251074274e-06, "loss": 0.4977, "step": 535 }, { "ETA": 4.94, "epoch": 0.1723749799003055, "fp16_scale": 1.0, "global_step": 536, "grad_norm": 2.2717337535407816, "learning_rate": 1.8958044016583874e-06, "loss": 0.4284, "step": 536 }, { "ETA": 4.94, "epoch": 0.17269657501205982, "fp16_scale": 1.0, "global_step": 537, "grad_norm": 2.13873199798984, "learning_rate": 1.8953408056003896e-06, "loss": 0.415, "step": 537 }, { "ETA": 4.93, "epoch": 0.17301817012381412, "fp16_scale": 1.0, "global_step": 538, "grad_norm": 1.9101124996443284, "learning_rate": 1.8948762374367778e-06, "loss": 0.4479, "step": 538 }, { "ETA": 4.93, "epoch": 0.17333976523556843, "fp16_scale": 1.0, "global_step": 539, "grad_norm": 2.1281584409565477, "learning_rate": 1.8944106976719512e-06, "loss": 0.4806, "step": 539 }, { "ETA": 4.93, "epoch": 0.1736613603473227, "fp16_scale": 1.0, "global_step": 540, "grad_norm": 2.0622123122804354, "learning_rate": 1.893944186811364e-06, "loss": 0.4699, "step": 540 }, { "ETA": 4.93, "epoch": 0.17398295545907702, "fp16_scale": 1.0, "global_step": 541, "grad_norm": 2.041120255261446, "learning_rate": 1.8934767053615247e-06, "loss": 0.4499, "step": 541 }, { "ETA": 4.93, "epoch": 0.17430455057083133, "fp16_scale": 1.0, "global_step": 542, "grad_norm": 2.0870200132391044, "learning_rate": 1.8930082538299965e-06, "loss": 0.4764, "step": 542 }, { "ETA": 4.93, "epoch": 0.17462614568258564, "fp16_scale": 1.0, "global_step": 543, "grad_norm": 2.41639846747191, "learning_rate": 1.892538832725394e-06, "loss": 0.3978, "step": 543 }, { "ETA": 4.93, "epoch": 0.17494774079433992, "fp16_scale": 1.0, "global_step": 544, "grad_norm": 2.035605720561539, "learning_rate": 1.8920684425573864e-06, "loss": 0.4566, "step": 544 }, { "ETA": 4.92, "epoch": 0.17526933590609423, "fp16_scale": 1.0, "global_step": 545, "grad_norm": 1.83310911668132, "learning_rate": 1.891597083836694e-06, "loss": 0.5362, "step": 545 }, { "ETA": 4.92, "epoch": 0.17559093101784853, "fp16_scale": 1.0, "global_step": 546, "grad_norm": 1.896095684151234, "learning_rate": 1.8911247570750883e-06, "loss": 0.4819, "step": 546 }, { "ETA": 4.92, "epoch": 0.17591252612960284, "fp16_scale": 1.0, "global_step": 547, "grad_norm": 1.8853474029996398, "learning_rate": 1.8906514627853934e-06, "loss": 0.4873, "step": 547 }, { "ETA": 4.92, "epoch": 0.17623412124135712, "fp16_scale": 1.0, "global_step": 548, "grad_norm": 1.9571717149678636, "learning_rate": 1.8901772014814822e-06, "loss": 0.5372, "step": 548 }, { "ETA": 4.92, "epoch": 0.17655571635311143, "fp16_scale": 1.0, "global_step": 549, "grad_norm": 1.8769314230870588, "learning_rate": 1.889701973678279e-06, "loss": 0.4166, "step": 549 }, { "ETA": 4.92, "epoch": 0.17687731146486574, "fp16_scale": 1.0, "global_step": 550, "grad_norm": 1.926745259518635, "learning_rate": 1.8892257798917557e-06, "loss": 0.4563, "step": 550 }, { "ETA": 4.92, "epoch": 0.17719890657662005, "fp16_scale": 1.0, "global_step": 551, "grad_norm": 2.02103573757482, "learning_rate": 1.888748620638935e-06, "loss": 0.4151, "step": 551 }, { "ETA": 4.91, "epoch": 0.17752050168837433, "fp16_scale": 1.0, "global_step": 552, "grad_norm": 1.885756779896396, "learning_rate": 1.8882704964378867e-06, "loss": 0.4884, "step": 552 }, { "ETA": 4.91, "epoch": 0.17784209680012864, "fp16_scale": 1.0, "global_step": 553, "grad_norm": 2.0146509941279396, "learning_rate": 1.8877914078077279e-06, "loss": 0.3924, "step": 553 }, { "ETA": 4.91, "epoch": 0.17816369191188294, "fp16_scale": 1.0, "global_step": 554, "grad_norm": 2.4579834422780604, "learning_rate": 1.8873113552686237e-06, "loss": 0.4015, "step": 554 }, { "ETA": 4.9, "epoch": 0.17848528702363725, "fp16_scale": 1.0, "global_step": 555, "grad_norm": 1.934591746997279, "learning_rate": 1.8868303393417856e-06, "loss": 0.4506, "step": 555 }, { "ETA": 4.9, "epoch": 0.17880688213539153, "fp16_scale": 1.0, "global_step": 556, "grad_norm": 1.7235134069580018, "learning_rate": 1.8863483605494706e-06, "loss": 0.4576, "step": 556 }, { "ETA": 4.9, "epoch": 0.17912847724714584, "fp16_scale": 1.0, "global_step": 557, "grad_norm": 2.0894967580717423, "learning_rate": 1.8858654194149816e-06, "loss": 0.5248, "step": 557 }, { "ETA": 4.9, "epoch": 0.17945007235890015, "fp16_scale": 1.0, "global_step": 558, "grad_norm": 2.097258628227925, "learning_rate": 1.8853815164626666e-06, "loss": 0.4648, "step": 558 }, { "ETA": 4.9, "epoch": 0.17977166747065446, "fp16_scale": 1.0, "global_step": 559, "grad_norm": 1.8403413298853255, "learning_rate": 1.8848966522179167e-06, "loss": 0.4175, "step": 559 }, { "ETA": 4.89, "epoch": 0.18009326258240874, "fp16_scale": 1.0, "global_step": 560, "grad_norm": 2.0927307686172423, "learning_rate": 1.8844108272071679e-06, "loss": 0.3805, "step": 560 }, { "ETA": 4.89, "epoch": 0.18041485769416304, "fp16_scale": 1.0, "global_step": 561, "grad_norm": 2.131386571404521, "learning_rate": 1.8839240419578987e-06, "loss": 0.3927, "step": 561 }, { "ETA": 4.89, "epoch": 0.18073645280591735, "fp16_scale": 1.0, "global_step": 562, "grad_norm": 1.826343865639814, "learning_rate": 1.8834362969986307e-06, "loss": 0.4488, "step": 562 }, { "ETA": 4.89, "epoch": 0.18105804791767166, "fp16_scale": 1.0, "global_step": 563, "grad_norm": 1.948149057105855, "learning_rate": 1.8829475928589268e-06, "loss": 0.5413, "step": 563 }, { "ETA": 4.89, "epoch": 0.18137964302942594, "fp16_scale": 1.0, "global_step": 564, "grad_norm": 1.968769807335462, "learning_rate": 1.8824579300693922e-06, "loss": 0.5736, "step": 564 }, { "ETA": 4.89, "epoch": 0.18170123814118025, "fp16_scale": 1.0, "global_step": 565, "grad_norm": 1.8844946208572746, "learning_rate": 1.881967309161672e-06, "loss": 0.4289, "step": 565 }, { "ETA": 4.88, "epoch": 0.18202283325293456, "fp16_scale": 1.0, "global_step": 566, "grad_norm": 1.903822174394183, "learning_rate": 1.881475730668452e-06, "loss": 0.4828, "step": 566 }, { "ETA": 4.88, "epoch": 0.18234442836468887, "fp16_scale": 1.0, "global_step": 567, "grad_norm": 2.2108039620773954, "learning_rate": 1.880983195123458e-06, "loss": 0.5257, "step": 567 }, { "ETA": 4.88, "epoch": 0.18266602347644315, "fp16_scale": 1.0, "global_step": 568, "grad_norm": 2.099351752581281, "learning_rate": 1.8804897030614546e-06, "loss": 0.4821, "step": 568 }, { "ETA": 4.88, "epoch": 0.18298761858819745, "fp16_scale": 1.0, "global_step": 569, "grad_norm": 1.9382447625923098, "learning_rate": 1.8799952550182445e-06, "loss": 0.4487, "step": 569 }, { "ETA": 4.88, "epoch": 0.18330921369995176, "fp16_scale": 1.0, "global_step": 570, "grad_norm": 2.2681362181489053, "learning_rate": 1.879499851530669e-06, "loss": 0.6149, "step": 570 }, { "ETA": 4.88, "epoch": 0.18363080881170607, "fp16_scale": 1.0, "global_step": 571, "grad_norm": 1.940133081860052, "learning_rate": 1.879003493136607e-06, "loss": 0.4497, "step": 571 }, { "ETA": 4.88, "epoch": 0.18395240392346035, "fp16_scale": 1.0, "global_step": 572, "grad_norm": 2.0599883726403987, "learning_rate": 1.8785061803749729e-06, "loss": 0.4778, "step": 572 }, { "ETA": 4.88, "epoch": 0.18427399903521466, "fp16_scale": 1.0, "global_step": 573, "grad_norm": 2.110329637174342, "learning_rate": 1.8780079137857188e-06, "loss": 0.4481, "step": 573 }, { "ETA": 4.88, "epoch": 0.18459559414696897, "fp16_scale": 1.0, "global_step": 574, "grad_norm": 1.919884947344211, "learning_rate": 1.877508693909831e-06, "loss": 0.492, "step": 574 }, { "ETA": 4.87, "epoch": 0.18491718925872327, "fp16_scale": 1.0, "global_step": 575, "grad_norm": 1.8663129640547262, "learning_rate": 1.8770085212893326e-06, "loss": 0.3873, "step": 575 }, { "ETA": 4.87, "epoch": 0.18523878437047755, "fp16_scale": 1.0, "global_step": 576, "grad_norm": 2.1952302394856975, "learning_rate": 1.876507396467279e-06, "loss": 0.3941, "step": 576 }, { "ETA": 4.87, "epoch": 0.18556037948223186, "fp16_scale": 1.0, "global_step": 577, "grad_norm": 2.0533985048962893, "learning_rate": 1.8760053199877607e-06, "loss": 0.3433, "step": 577 }, { "ETA": 4.86, "epoch": 0.18588197459398617, "fp16_scale": 1.0, "global_step": 578, "grad_norm": 2.0182071082471293, "learning_rate": 1.8755022923959016e-06, "loss": 0.474, "step": 578 }, { "ETA": 4.86, "epoch": 0.18620356970574048, "fp16_scale": 1.0, "global_step": 579, "grad_norm": 2.002732610511696, "learning_rate": 1.874998314237858e-06, "loss": 0.469, "step": 579 }, { "ETA": 4.86, "epoch": 0.1865251648174948, "fp16_scale": 1.0, "global_step": 580, "grad_norm": 2.039418867547622, "learning_rate": 1.8744933860608182e-06, "loss": 0.4198, "step": 580 }, { "ETA": 4.86, "epoch": 0.18684675992924907, "fp16_scale": 1.0, "global_step": 581, "grad_norm": 1.8329540055828422, "learning_rate": 1.873987508413001e-06, "loss": 0.4763, "step": 581 }, { "ETA": 4.85, "epoch": 0.18716835504100338, "fp16_scale": 1.0, "global_step": 582, "grad_norm": 1.8742753050198495, "learning_rate": 1.8734806818436582e-06, "loss": 0.4124, "step": 582 }, { "ETA": 4.85, "epoch": 0.18748995015275768, "fp16_scale": 1.0, "global_step": 583, "grad_norm": 1.9397192271287038, "learning_rate": 1.8729729069030702e-06, "loss": 0.505, "step": 583 }, { "ETA": 4.85, "epoch": 0.187811545264512, "fp16_scale": 1.0, "global_step": 584, "grad_norm": 2.108304627582822, "learning_rate": 1.8724641841425478e-06, "loss": 0.4052, "step": 584 }, { "ETA": 4.85, "epoch": 0.18813314037626627, "fp16_scale": 1.0, "global_step": 585, "grad_norm": 1.9067581604657606, "learning_rate": 1.8719545141144305e-06, "loss": 0.4544, "step": 585 }, { "ETA": 4.84, "epoch": 0.18845473548802058, "fp16_scale": 1.0, "global_step": 586, "grad_norm": 1.892836923763899, "learning_rate": 1.8714438973720866e-06, "loss": 0.3964, "step": 586 }, { "ETA": 4.84, "epoch": 0.1887763305997749, "fp16_scale": 1.0, "global_step": 587, "grad_norm": 1.984445559124549, "learning_rate": 1.8709323344699116e-06, "loss": 0.428, "step": 587 }, { "ETA": 4.84, "epoch": 0.1890979257115292, "fp16_scale": 1.0, "global_step": 588, "grad_norm": 2.2199260725468366, "learning_rate": 1.8704198259633297e-06, "loss": 0.396, "step": 588 }, { "ETA": 4.83, "epoch": 0.18941952082328348, "fp16_scale": 1.0, "global_step": 589, "grad_norm": 1.772486612676109, "learning_rate": 1.8699063724087903e-06, "loss": 0.3727, "step": 589 }, { "ETA": 4.83, "epoch": 0.18974111593503779, "fp16_scale": 1.0, "global_step": 590, "grad_norm": 1.926100516202449, "learning_rate": 1.8693919743637697e-06, "loss": 0.3861, "step": 590 }, { "ETA": 4.83, "epoch": 0.1900627110467921, "fp16_scale": 1.0, "global_step": 591, "grad_norm": 2.3916796754464613, "learning_rate": 1.8688766323867694e-06, "loss": 0.366, "step": 591 }, { "ETA": 4.83, "epoch": 0.1903843061585464, "fp16_scale": 1.0, "global_step": 592, "grad_norm": 1.9010824784128724, "learning_rate": 1.8683603470373156e-06, "loss": 0.5022, "step": 592 }, { "ETA": 4.83, "epoch": 0.19070590127030068, "fp16_scale": 1.0, "global_step": 593, "grad_norm": 2.0597054389558402, "learning_rate": 1.8678431188759594e-06, "loss": 0.4443, "step": 593 }, { "ETA": 4.82, "epoch": 0.191027496382055, "fp16_scale": 1.0, "global_step": 594, "grad_norm": 1.7884018761750782, "learning_rate": 1.867324948464275e-06, "loss": 0.4631, "step": 594 }, { "ETA": 4.82, "epoch": 0.1913490914938093, "fp16_scale": 1.0, "global_step": 595, "grad_norm": 1.8868484963061303, "learning_rate": 1.8668058363648595e-06, "loss": 0.4437, "step": 595 }, { "ETA": 4.82, "epoch": 0.1916706866055636, "fp16_scale": 1.0, "global_step": 596, "grad_norm": 2.095772690216961, "learning_rate": 1.866285783141333e-06, "loss": 0.4201, "step": 596 }, { "ETA": 4.82, "epoch": 0.1919922817173179, "fp16_scale": 1.0, "global_step": 597, "grad_norm": 1.8552325554382783, "learning_rate": 1.8657647893583367e-06, "loss": 0.4808, "step": 597 }, { "ETA": 4.82, "epoch": 0.1923138768290722, "fp16_scale": 1.0, "global_step": 598, "grad_norm": 1.9484783011443043, "learning_rate": 1.8652428555815337e-06, "loss": 0.3913, "step": 598 }, { "ETA": 4.82, "epoch": 0.1926354719408265, "fp16_scale": 1.0, "global_step": 599, "grad_norm": 2.058521572514809, "learning_rate": 1.8647199823776075e-06, "loss": 0.4762, "step": 599 }, { "ETA": 4.82, "epoch": 0.1929570670525808, "fp16_scale": 1.0, "global_step": 600, "grad_norm": 2.1264069711297107, "learning_rate": 1.864196170314261e-06, "loss": 0.4808, "step": 600 }, { "ETA": 4.85, "epoch": 0.1932786621643351, "fp16_scale": 1.0, "global_step": 601, "grad_norm": 1.732500748250544, "learning_rate": 1.8636714199602173e-06, "loss": 0.4653, "step": 601 }, { "ETA": 4.84, "epoch": 0.1936002572760894, "fp16_scale": 1.0, "global_step": 602, "grad_norm": 2.1013280544007005, "learning_rate": 1.8631457318852176e-06, "loss": 0.3684, "step": 602 }, { "ETA": 4.84, "epoch": 0.1939218523878437, "fp16_scale": 1.0, "global_step": 603, "grad_norm": 1.9749589587766276, "learning_rate": 1.8626191066600216e-06, "loss": 0.4051, "step": 603 }, { "ETA": 4.84, "epoch": 0.19424344749959802, "fp16_scale": 1.0, "global_step": 604, "grad_norm": 2.175927882974133, "learning_rate": 1.8620915448564066e-06, "loss": 0.416, "step": 604 }, { "ETA": 4.84, "epoch": 0.1945650426113523, "fp16_scale": 1.0, "global_step": 605, "grad_norm": 2.031438401343737, "learning_rate": 1.861563047047166e-06, "loss": 0.4195, "step": 605 }, { "ETA": 4.83, "epoch": 0.1948866377231066, "fp16_scale": 1.0, "global_step": 606, "grad_norm": 2.183574618551414, "learning_rate": 1.8610336138061102e-06, "loss": 0.4374, "step": 606 }, { "ETA": 4.83, "epoch": 0.1952082328348609, "fp16_scale": 1.0, "global_step": 607, "grad_norm": 2.2343993713422217, "learning_rate": 1.8605032457080652e-06, "loss": 0.4276, "step": 607 }, { "ETA": 4.83, "epoch": 0.19552982794661522, "fp16_scale": 1.0, "global_step": 608, "grad_norm": 1.6430057100439763, "learning_rate": 1.859971943328872e-06, "loss": 0.3886, "step": 608 }, { "ETA": 4.83, "epoch": 0.1958514230583695, "fp16_scale": 1.0, "global_step": 609, "grad_norm": 1.9117037960619865, "learning_rate": 1.8594397072453853e-06, "loss": 0.4074, "step": 609 }, { "ETA": 4.82, "epoch": 0.1961730181701238, "fp16_scale": 1.0, "global_step": 610, "grad_norm": 2.421864106905433, "learning_rate": 1.8589065380354745e-06, "loss": 0.4771, "step": 610 }, { "ETA": 4.82, "epoch": 0.19649461328187812, "fp16_scale": 1.0, "global_step": 611, "grad_norm": 2.4002527375885623, "learning_rate": 1.8583724362780212e-06, "loss": 0.3781, "step": 611 }, { "ETA": 4.82, "epoch": 0.19681620839363242, "fp16_scale": 1.0, "global_step": 612, "grad_norm": 1.9699545619938954, "learning_rate": 1.857837402552921e-06, "loss": 0.4131, "step": 612 }, { "ETA": 4.81, "epoch": 0.1971378035053867, "fp16_scale": 1.0, "global_step": 613, "grad_norm": 2.2752226907372233, "learning_rate": 1.8573014374410795e-06, "loss": 0.5096, "step": 613 }, { "ETA": 4.81, "epoch": 0.197459398617141, "fp16_scale": 1.0, "global_step": 614, "grad_norm": 1.8314450152414132, "learning_rate": 1.8567645415244148e-06, "loss": 0.3465, "step": 614 }, { "ETA": 4.81, "epoch": 0.19778099372889532, "fp16_scale": 1.0, "global_step": 615, "grad_norm": 1.8977891548584482, "learning_rate": 1.8562267153858552e-06, "loss": 0.5017, "step": 615 }, { "ETA": 4.8, "epoch": 0.19810258884064963, "fp16_scale": 1.0, "global_step": 616, "grad_norm": 2.0420930643646806, "learning_rate": 1.8556879596093391e-06, "loss": 0.4094, "step": 616 }, { "ETA": 4.8, "epoch": 0.1984241839524039, "fp16_scale": 1.0, "global_step": 617, "grad_norm": 2.0549159698304513, "learning_rate": 1.8551482747798141e-06, "loss": 0.4693, "step": 617 }, { "ETA": 4.8, "epoch": 0.19874577906415822, "fp16_scale": 1.0, "global_step": 618, "grad_norm": 1.879700336166331, "learning_rate": 1.8546076614832365e-06, "loss": 0.4828, "step": 618 }, { "ETA": 4.8, "epoch": 0.19906737417591253, "fp16_scale": 1.0, "global_step": 619, "grad_norm": 1.8261289028259504, "learning_rate": 1.8540661203065707e-06, "loss": 0.4695, "step": 619 }, { "ETA": 4.8, "epoch": 0.19938896928766683, "fp16_scale": 1.0, "global_step": 620, "grad_norm": 1.8866623882949907, "learning_rate": 1.853523651837789e-06, "loss": 0.4323, "step": 620 }, { "ETA": 4.79, "epoch": 0.19971056439942114, "fp16_scale": 1.0, "global_step": 621, "grad_norm": 2.1287253170862184, "learning_rate": 1.8529802566658692e-06, "loss": 0.4183, "step": 621 }, { "ETA": 4.79, "epoch": 0.20003215951117542, "fp16_scale": 1.0, "global_step": 622, "grad_norm": 1.8618208672413505, "learning_rate": 1.852435935380797e-06, "loss": 0.4658, "step": 622 }, { "ETA": 4.78, "epoch": 0.20035375462292973, "fp16_scale": 1.0, "global_step": 623, "grad_norm": 2.254753764323054, "learning_rate": 1.8518906885735625e-06, "loss": 0.4352, "step": 623 }, { "ETA": 4.78, "epoch": 0.20067534973468404, "fp16_scale": 1.0, "global_step": 624, "grad_norm": 2.010192488268173, "learning_rate": 1.851344516836161e-06, "loss": 0.4339, "step": 624 }, { "ETA": 4.78, "epoch": 0.20099694484643835, "fp16_scale": 1.0, "global_step": 625, "grad_norm": 1.9849676704988954, "learning_rate": 1.8507974207615916e-06, "loss": 0.465, "step": 625 }, { "ETA": 4.78, "epoch": 0.20131853995819263, "fp16_scale": 1.0, "global_step": 626, "grad_norm": 1.8809543144615208, "learning_rate": 1.8502494009438576e-06, "loss": 0.5547, "step": 626 }, { "ETA": 4.77, "epoch": 0.20164013506994694, "fp16_scale": 1.0, "global_step": 627, "grad_norm": 2.1958689145152044, "learning_rate": 1.849700457977965e-06, "loss": 0.418, "step": 627 }, { "ETA": 4.77, "epoch": 0.20196173018170124, "fp16_scale": 1.0, "global_step": 628, "grad_norm": 2.1420083149279634, "learning_rate": 1.8491505924599223e-06, "loss": 0.4698, "step": 628 }, { "ETA": 4.77, "epoch": 0.20228332529345555, "fp16_scale": 1.0, "global_step": 629, "grad_norm": 2.182804317877393, "learning_rate": 1.8485998049867386e-06, "loss": 0.3843, "step": 629 }, { "ETA": 4.76, "epoch": 0.20260492040520983, "fp16_scale": 1.0, "global_step": 630, "grad_norm": 2.1621189673234813, "learning_rate": 1.8480480961564257e-06, "loss": 0.4753, "step": 630 }, { "ETA": 4.76, "epoch": 0.20292651551696414, "fp16_scale": 1.0, "global_step": 631, "grad_norm": 2.3352945651312056, "learning_rate": 1.8474954665679948e-06, "loss": 0.5026, "step": 631 }, { "ETA": 4.76, "epoch": 0.20324811062871845, "fp16_scale": 1.0, "global_step": 632, "grad_norm": 1.8208689508829023, "learning_rate": 1.8469419168214566e-06, "loss": 0.4212, "step": 632 }, { "ETA": 4.76, "epoch": 0.20356970574047276, "fp16_scale": 1.0, "global_step": 633, "grad_norm": 1.9206588859967546, "learning_rate": 1.8463874475178214e-06, "loss": 0.4007, "step": 633 }, { "ETA": 4.76, "epoch": 0.20389130085222704, "fp16_scale": 1.0, "global_step": 634, "grad_norm": 1.9366640673580708, "learning_rate": 1.8458320592590973e-06, "loss": 0.4894, "step": 634 }, { "ETA": 4.76, "epoch": 0.20421289596398134, "fp16_scale": 1.0, "global_step": 635, "grad_norm": 1.9962884829129268, "learning_rate": 1.8452757526482907e-06, "loss": 0.4686, "step": 635 }, { "ETA": 4.76, "epoch": 0.20453449107573565, "fp16_scale": 1.0, "global_step": 636, "grad_norm": 1.8664658036605462, "learning_rate": 1.8447185282894049e-06, "loss": 0.4387, "step": 636 }, { "ETA": 4.75, "epoch": 0.20485608618748996, "fp16_scale": 1.0, "global_step": 637, "grad_norm": 1.7841863488835676, "learning_rate": 1.8441603867874397e-06, "loss": 0.4318, "step": 637 }, { "ETA": 4.75, "epoch": 0.20517768129924424, "fp16_scale": 1.0, "global_step": 638, "grad_norm": 2.0306722810093936, "learning_rate": 1.8436013287483902e-06, "loss": 0.4738, "step": 638 }, { "ETA": 4.75, "epoch": 0.20549927641099855, "fp16_scale": 1.0, "global_step": 639, "grad_norm": 2.137654186818178, "learning_rate": 1.843041354779247e-06, "loss": 0.3841, "step": 639 }, { "ETA": 4.74, "epoch": 0.20582087152275286, "fp16_scale": 1.0, "global_step": 640, "grad_norm": 2.0265562481011754, "learning_rate": 1.8424804654879961e-06, "loss": 0.4185, "step": 640 }, { "ETA": 4.74, "epoch": 0.20614246663450717, "fp16_scale": 1.0, "global_step": 641, "grad_norm": 1.8130403454687563, "learning_rate": 1.8419186614836153e-06, "loss": 0.3894, "step": 641 }, { "ETA": 4.74, "epoch": 0.20646406174626145, "fp16_scale": 1.0, "global_step": 642, "grad_norm": 2.009846041145111, "learning_rate": 1.841355943376077e-06, "loss": 0.4266, "step": 642 }, { "ETA": 4.74, "epoch": 0.20678565685801575, "fp16_scale": 1.0, "global_step": 643, "grad_norm": 2.006466947197548, "learning_rate": 1.840792311776346e-06, "loss": 0.3877, "step": 643 }, { "ETA": 4.74, "epoch": 0.20710725196977006, "fp16_scale": 1.0, "global_step": 644, "grad_norm": 1.9422981714149796, "learning_rate": 1.840227767296378e-06, "loss": 0.4504, "step": 644 }, { "ETA": 4.73, "epoch": 0.20742884708152437, "fp16_scale": 1.0, "global_step": 645, "grad_norm": 1.9018256870200687, "learning_rate": 1.8396623105491207e-06, "loss": 0.4425, "step": 645 }, { "ETA": 4.73, "epoch": 0.20775044219327865, "fp16_scale": 1.0, "global_step": 646, "grad_norm": 2.027288659656249, "learning_rate": 1.839095942148512e-06, "loss": 0.3987, "step": 646 }, { "ETA": 4.73, "epoch": 0.20807203730503296, "fp16_scale": 1.0, "global_step": 647, "grad_norm": 1.872799097561611, "learning_rate": 1.8385286627094797e-06, "loss": 0.4947, "step": 647 }, { "ETA": 4.72, "epoch": 0.20839363241678727, "fp16_scale": 1.0, "global_step": 648, "grad_norm": 2.111928113572892, "learning_rate": 1.8379604728479407e-06, "loss": 0.4198, "step": 648 }, { "ETA": 4.72, "epoch": 0.20871522752854157, "fp16_scale": 1.0, "global_step": 649, "grad_norm": 1.9399081155702231, "learning_rate": 1.8373913731808006e-06, "loss": 0.4158, "step": 649 }, { "ETA": 4.71, "epoch": 0.20903682264029586, "fp16_scale": 1.0, "global_step": 650, "grad_norm": 2.196310553347871, "learning_rate": 1.836821364325952e-06, "loss": 0.3836, "step": 650 }, { "ETA": 4.71, "epoch": 0.20935841775205016, "fp16_scale": 1.0, "global_step": 651, "grad_norm": 2.119228948550605, "learning_rate": 1.8362504469022757e-06, "loss": 0.427, "step": 651 }, { "ETA": 4.71, "epoch": 0.20968001286380447, "fp16_scale": 1.0, "global_step": 652, "grad_norm": 2.0040639594431044, "learning_rate": 1.8356786215296384e-06, "loss": 0.4517, "step": 652 }, { "ETA": 4.71, "epoch": 0.21000160797555878, "fp16_scale": 1.0, "global_step": 653, "grad_norm": 2.072695395984966, "learning_rate": 1.8351058888288927e-06, "loss": 0.4989, "step": 653 }, { "ETA": 4.71, "epoch": 0.21032320308731306, "fp16_scale": 1.0, "global_step": 654, "grad_norm": 2.162234855537016, "learning_rate": 1.8345322494218762e-06, "loss": 0.5036, "step": 654 }, { "ETA": 4.7, "epoch": 0.21064479819906737, "fp16_scale": 1.0, "global_step": 655, "grad_norm": 2.2823715720576274, "learning_rate": 1.8339577039314111e-06, "loss": 0.3904, "step": 655 }, { "ETA": 4.7, "epoch": 0.21096639331082168, "fp16_scale": 1.0, "global_step": 656, "grad_norm": 2.3729771485791655, "learning_rate": 1.8333822529813032e-06, "loss": 0.464, "step": 656 }, { "ETA": 4.7, "epoch": 0.21128798842257598, "fp16_scale": 1.0, "global_step": 657, "grad_norm": 2.155816059326467, "learning_rate": 1.8328058971963416e-06, "loss": 0.4146, "step": 657 }, { "ETA": 4.7, "epoch": 0.21160958353433026, "fp16_scale": 1.0, "global_step": 658, "grad_norm": 1.9841320210831421, "learning_rate": 1.8322286372022982e-06, "loss": 0.4693, "step": 658 }, { "ETA": 4.69, "epoch": 0.21193117864608457, "fp16_scale": 1.0, "global_step": 659, "grad_norm": 2.1265935844277317, "learning_rate": 1.8316504736259254e-06, "loss": 0.5274, "step": 659 }, { "ETA": 4.69, "epoch": 0.21225277375783888, "fp16_scale": 1.0, "global_step": 660, "grad_norm": 2.084041465792925, "learning_rate": 1.8310714070949578e-06, "loss": 0.4314, "step": 660 }, { "ETA": 4.69, "epoch": 0.2125743688695932, "fp16_scale": 1.0, "global_step": 661, "grad_norm": 2.490711656110001, "learning_rate": 1.8304914382381098e-06, "loss": 0.4212, "step": 661 }, { "ETA": 4.69, "epoch": 0.2128959639813475, "fp16_scale": 1.0, "global_step": 662, "grad_norm": 2.170150924977779, "learning_rate": 1.8299105676850757e-06, "loss": 0.4744, "step": 662 }, { "ETA": 4.69, "epoch": 0.21321755909310178, "fp16_scale": 1.0, "global_step": 663, "grad_norm": 1.9143635362599047, "learning_rate": 1.8293287960665294e-06, "loss": 0.3939, "step": 663 }, { "ETA": 4.69, "epoch": 0.21353915420485609, "fp16_scale": 1.0, "global_step": 664, "grad_norm": 1.9950231918545598, "learning_rate": 1.8287461240141215e-06, "loss": 0.5137, "step": 664 }, { "ETA": 4.68, "epoch": 0.2138607493166104, "fp16_scale": 1.0, "global_step": 665, "grad_norm": 1.9897696769840771, "learning_rate": 1.828162552160482e-06, "loss": 0.4226, "step": 665 }, { "ETA": 4.68, "epoch": 0.2141823444283647, "fp16_scale": 1.0, "global_step": 666, "grad_norm": 1.9982248356650532, "learning_rate": 1.827578081139217e-06, "loss": 0.4246, "step": 666 }, { "ETA": 4.67, "epoch": 0.21450393954011898, "fp16_scale": 1.0, "global_step": 667, "grad_norm": 2.267752882574821, "learning_rate": 1.8269927115849084e-06, "loss": 0.4222, "step": 667 }, { "ETA": 4.67, "epoch": 0.2148255346518733, "fp16_scale": 1.0, "global_step": 668, "grad_norm": 2.1412154715946783, "learning_rate": 1.8264064441331147e-06, "loss": 0.4113, "step": 668 }, { "ETA": 4.67, "epoch": 0.2151471297636276, "fp16_scale": 1.0, "global_step": 669, "grad_norm": 2.231901326579392, "learning_rate": 1.8258192794203686e-06, "loss": 0.5034, "step": 669 }, { "ETA": 4.66, "epoch": 0.2154687248753819, "fp16_scale": 1.0, "global_step": 670, "grad_norm": 2.065323137925775, "learning_rate": 1.8252312180841776e-06, "loss": 0.3939, "step": 670 }, { "ETA": 4.66, "epoch": 0.2157903199871362, "fp16_scale": 1.0, "global_step": 671, "grad_norm": 1.8352790986920346, "learning_rate": 1.8246422607630217e-06, "loss": 0.5134, "step": 671 }, { "ETA": 4.66, "epoch": 0.2161119150988905, "fp16_scale": 1.0, "global_step": 672, "grad_norm": 1.8676361680015254, "learning_rate": 1.8240524080963548e-06, "loss": 0.4833, "step": 672 }, { "ETA": 4.66, "epoch": 0.2164335102106448, "fp16_scale": 1.0, "global_step": 673, "grad_norm": 1.7359585046809156, "learning_rate": 1.823461660724602e-06, "loss": 0.454, "step": 673 }, { "ETA": 4.65, "epoch": 0.2167551053223991, "fp16_scale": 1.0, "global_step": 674, "grad_norm": 2.5898757152168645, "learning_rate": 1.8228700192891605e-06, "loss": 0.4056, "step": 674 }, { "ETA": 4.65, "epoch": 0.2170767004341534, "fp16_scale": 1.0, "global_step": 675, "grad_norm": 1.7924015547754597, "learning_rate": 1.8222774844323978e-06, "loss": 0.4143, "step": 675 }, { "ETA": 4.65, "epoch": 0.2173982955459077, "fp16_scale": 1.0, "global_step": 676, "grad_norm": 1.9921557615333092, "learning_rate": 1.8216840567976516e-06, "loss": 0.4321, "step": 676 }, { "ETA": 4.65, "epoch": 0.217719890657662, "fp16_scale": 1.0, "global_step": 677, "grad_norm": 2.031491140076144, "learning_rate": 1.821089737029229e-06, "loss": 0.4886, "step": 677 }, { "ETA": 4.64, "epoch": 0.21804148576941632, "fp16_scale": 1.0, "global_step": 678, "grad_norm": 2.066052422348665, "learning_rate": 1.8204945257724057e-06, "loss": 0.3586, "step": 678 }, { "ETA": 4.64, "epoch": 0.2183630808811706, "fp16_scale": 1.0, "global_step": 679, "grad_norm": 2.112887004982494, "learning_rate": 1.8198984236734245e-06, "loss": 0.3591, "step": 679 }, { "ETA": 4.64, "epoch": 0.2186846759929249, "fp16_scale": 1.0, "global_step": 680, "grad_norm": 1.8330286025539153, "learning_rate": 1.819301431379497e-06, "loss": 0.4298, "step": 680 }, { "ETA": 4.64, "epoch": 0.2190062711046792, "fp16_scale": 1.0, "global_step": 681, "grad_norm": 2.059414327645457, "learning_rate": 1.8187035495388e-06, "loss": 0.3943, "step": 681 }, { "ETA": 4.64, "epoch": 0.21932786621643352, "fp16_scale": 1.0, "global_step": 682, "grad_norm": 1.9002396763580478, "learning_rate": 1.8181047788004768e-06, "loss": 0.4064, "step": 682 }, { "ETA": 4.63, "epoch": 0.2196494613281878, "fp16_scale": 1.0, "global_step": 683, "grad_norm": 1.9827519447925184, "learning_rate": 1.8175051198146356e-06, "loss": 0.5063, "step": 683 }, { "ETA": 4.63, "epoch": 0.2199710564399421, "fp16_scale": 1.0, "global_step": 684, "grad_norm": 2.0601645455000637, "learning_rate": 1.816904573232349e-06, "loss": 0.445, "step": 684 }, { "ETA": 4.63, "epoch": 0.22029265155169642, "fp16_scale": 1.0, "global_step": 685, "grad_norm": 1.8331284872167692, "learning_rate": 1.8163031397056531e-06, "loss": 0.4083, "step": 685 }, { "ETA": 4.63, "epoch": 0.22061424666345072, "fp16_scale": 1.0, "global_step": 686, "grad_norm": 3.296288761928108, "learning_rate": 1.8157008198875477e-06, "loss": 0.415, "step": 686 }, { "ETA": 4.63, "epoch": 0.220935841775205, "fp16_scale": 1.0, "global_step": 687, "grad_norm": 2.1112734371036406, "learning_rate": 1.8150976144319936e-06, "loss": 0.4791, "step": 687 }, { "ETA": 4.63, "epoch": 0.2212574368869593, "fp16_scale": 1.0, "global_step": 688, "grad_norm": 1.8901633853998032, "learning_rate": 1.8144935239939142e-06, "loss": 0.4727, "step": 688 }, { "ETA": 4.63, "epoch": 0.22157903199871362, "fp16_scale": 1.0, "global_step": 689, "grad_norm": 1.879120205442335, "learning_rate": 1.813888549229194e-06, "loss": 0.4175, "step": 689 }, { "ETA": 4.63, "epoch": 0.22190062711046793, "fp16_scale": 1.0, "global_step": 690, "grad_norm": 2.1020368460332337, "learning_rate": 1.8132826907946765e-06, "loss": 0.3883, "step": 690 }, { "ETA": 4.62, "epoch": 0.2222222222222222, "fp16_scale": 1.0, "global_step": 691, "grad_norm": 2.067374511395301, "learning_rate": 1.8126759493481658e-06, "loss": 0.4429, "step": 691 }, { "ETA": 4.62, "epoch": 0.22254381733397652, "fp16_scale": 1.0, "global_step": 692, "grad_norm": 1.9464387876385234, "learning_rate": 1.812068325548424e-06, "loss": 0.4954, "step": 692 }, { "ETA": 4.62, "epoch": 0.22286541244573083, "fp16_scale": 1.0, "global_step": 693, "grad_norm": 1.9262835125630478, "learning_rate": 1.811459820055171e-06, "loss": 0.4688, "step": 693 }, { "ETA": 4.62, "epoch": 0.22318700755748513, "fp16_scale": 1.0, "global_step": 694, "grad_norm": 2.1918110355029756, "learning_rate": 1.810850433529085e-06, "loss": 0.5037, "step": 694 }, { "ETA": 4.62, "epoch": 0.22350860266923941, "fp16_scale": 1.0, "global_step": 695, "grad_norm": 1.832957030289425, "learning_rate": 1.8102401666317996e-06, "loss": 0.5234, "step": 695 }, { "ETA": 4.62, "epoch": 0.22383019778099372, "fp16_scale": 1.0, "global_step": 696, "grad_norm": 1.8098859921784485, "learning_rate": 1.8096290200259055e-06, "loss": 0.4518, "step": 696 }, { "ETA": 4.62, "epoch": 0.22415179289274803, "fp16_scale": 1.0, "global_step": 697, "grad_norm": 2.048553780993463, "learning_rate": 1.8090169943749474e-06, "loss": 0.4365, "step": 697 }, { "ETA": 4.61, "epoch": 0.22447338800450234, "fp16_scale": 1.0, "global_step": 698, "grad_norm": 2.200686749413936, "learning_rate": 1.8084040903434248e-06, "loss": 0.3814, "step": 698 }, { "ETA": 4.61, "epoch": 0.22479498311625662, "fp16_scale": 1.0, "global_step": 699, "grad_norm": 1.924796579595174, "learning_rate": 1.807790308596791e-06, "loss": 0.4456, "step": 699 }, { "ETA": 4.61, "epoch": 0.22511657822801093, "fp16_scale": 1.0, "global_step": 700, "grad_norm": 1.709388799581436, "learning_rate": 1.8071756498014525e-06, "loss": 0.4432, "step": 700 }, { "ETA": 4.61, "epoch": 0.22543817333976524, "fp16_scale": 1.0, "global_step": 701, "grad_norm": 1.9075606175094577, "learning_rate": 1.8065601146247676e-06, "loss": 0.4508, "step": 701 }, { "ETA": 4.61, "epoch": 0.22575976845151954, "fp16_scale": 1.0, "global_step": 702, "grad_norm": 2.017820857885172, "learning_rate": 1.805943703735046e-06, "loss": 0.4179, "step": 702 }, { "ETA": 4.6, "epoch": 0.22608136356327385, "fp16_scale": 1.0, "global_step": 703, "grad_norm": 1.9259765770629065, "learning_rate": 1.8053264178015489e-06, "loss": 0.466, "step": 703 }, { "ETA": 4.6, "epoch": 0.22640295867502813, "fp16_scale": 1.0, "global_step": 704, "grad_norm": 1.9024356112618308, "learning_rate": 1.8047082574944867e-06, "loss": 0.46, "step": 704 }, { "ETA": 4.6, "epoch": 0.22672455378678244, "fp16_scale": 1.0, "global_step": 705, "grad_norm": 2.0781068091814423, "learning_rate": 1.8040892234850202e-06, "loss": 0.4756, "step": 705 }, { "ETA": 4.6, "epoch": 0.22704614889853675, "fp16_scale": 1.0, "global_step": 706, "grad_norm": 1.8396122753092925, "learning_rate": 1.8034693164452577e-06, "loss": 0.4244, "step": 706 }, { "ETA": 4.59, "epoch": 0.22736774401029106, "fp16_scale": 1.0, "global_step": 707, "grad_norm": 1.9992189970908851, "learning_rate": 1.802848537048256e-06, "loss": 0.3647, "step": 707 }, { "ETA": 4.59, "epoch": 0.22768933912204534, "fp16_scale": 1.0, "global_step": 708, "grad_norm": 2.1224032032365585, "learning_rate": 1.8022268859680194e-06, "loss": 0.4193, "step": 708 }, { "ETA": 4.59, "epoch": 0.22801093423379964, "fp16_scale": 1.0, "global_step": 709, "grad_norm": 2.1626700811466986, "learning_rate": 1.8016043638794973e-06, "loss": 0.4294, "step": 709 }, { "ETA": 4.59, "epoch": 0.22833252934555395, "fp16_scale": 1.0, "global_step": 710, "grad_norm": 1.9963804168815649, "learning_rate": 1.8009809714585863e-06, "loss": 0.4556, "step": 710 }, { "ETA": 4.59, "epoch": 0.22865412445730826, "fp16_scale": 1.0, "global_step": 711, "grad_norm": 1.820212185204622, "learning_rate": 1.800356709382127e-06, "loss": 0.4039, "step": 711 }, { "ETA": 4.58, "epoch": 0.22897571956906254, "fp16_scale": 1.0, "global_step": 712, "grad_norm": 1.965943253027829, "learning_rate": 1.7997315783279045e-06, "loss": 0.3419, "step": 712 }, { "ETA": 4.58, "epoch": 0.22929731468081685, "fp16_scale": 1.0, "global_step": 713, "grad_norm": 1.9727340324024272, "learning_rate": 1.7991055789746477e-06, "loss": 0.4642, "step": 713 }, { "ETA": 4.58, "epoch": 0.22961890979257116, "fp16_scale": 1.0, "global_step": 714, "grad_norm": 1.9682197257968195, "learning_rate": 1.7984787120020272e-06, "loss": 0.3678, "step": 714 }, { "ETA": 4.57, "epoch": 0.22994050490432547, "fp16_scale": 1.0, "global_step": 715, "grad_norm": 2.016209477905265, "learning_rate": 1.7978509780906577e-06, "loss": 0.4064, "step": 715 }, { "ETA": 4.57, "epoch": 0.23026210001607975, "fp16_scale": 1.0, "global_step": 716, "grad_norm": 1.9510813623596766, "learning_rate": 1.7972223779220922e-06, "loss": 0.4791, "step": 716 }, { "ETA": 4.57, "epoch": 0.23058369512783405, "fp16_scale": 1.0, "global_step": 717, "grad_norm": 1.9638577974984546, "learning_rate": 1.796592912178827e-06, "loss": 0.4723, "step": 717 }, { "ETA": 4.57, "epoch": 0.23090529023958836, "fp16_scale": 1.0, "global_step": 718, "grad_norm": 1.768505989368743, "learning_rate": 1.795962581544297e-06, "loss": 0.4377, "step": 718 }, { "ETA": 4.57, "epoch": 0.23122688535134267, "fp16_scale": 1.0, "global_step": 719, "grad_norm": 2.210067772931334, "learning_rate": 1.7953313867028755e-06, "loss": 0.5496, "step": 719 }, { "ETA": 4.56, "epoch": 0.23154848046309695, "fp16_scale": 1.0, "global_step": 720, "grad_norm": 1.8219268673341542, "learning_rate": 1.7946993283398748e-06, "loss": 0.4321, "step": 720 }, { "ETA": 4.56, "epoch": 0.23187007557485126, "fp16_scale": 1.0, "global_step": 721, "grad_norm": 2.004344691510899, "learning_rate": 1.7940664071415457e-06, "loss": 0.5344, "step": 721 }, { "ETA": 4.56, "epoch": 0.23219167068660557, "fp16_scale": 1.0, "global_step": 722, "grad_norm": 1.9855726746474036, "learning_rate": 1.793432623795074e-06, "loss": 0.4949, "step": 722 }, { "ETA": 4.56, "epoch": 0.23251326579835987, "fp16_scale": 1.0, "global_step": 723, "grad_norm": 2.0486130421428657, "learning_rate": 1.7927979789885824e-06, "loss": 0.4047, "step": 723 }, { "ETA": 4.56, "epoch": 0.23283486091011416, "fp16_scale": 1.0, "global_step": 724, "grad_norm": 1.9272440469802035, "learning_rate": 1.7921624734111291e-06, "loss": 0.4699, "step": 724 }, { "ETA": 4.56, "epoch": 0.23315645602186846, "fp16_scale": 1.0, "global_step": 725, "grad_norm": 1.9601881150311802, "learning_rate": 1.7915261077527072e-06, "loss": 0.3968, "step": 725 }, { "ETA": 4.56, "epoch": 0.23347805113362277, "fp16_scale": 1.0, "global_step": 726, "grad_norm": 2.0041671971661597, "learning_rate": 1.7908888827042424e-06, "loss": 0.4972, "step": 726 }, { "ETA": 4.56, "epoch": 0.23379964624537708, "fp16_scale": 1.0, "global_step": 727, "grad_norm": 1.9826428974386219, "learning_rate": 1.7902507989575947e-06, "loss": 0.399, "step": 727 }, { "ETA": 4.55, "epoch": 0.23412124135713136, "fp16_scale": 1.0, "global_step": 728, "grad_norm": 2.0330369293048265, "learning_rate": 1.7896118572055555e-06, "loss": 0.4726, "step": 728 }, { "ETA": 4.55, "epoch": 0.23444283646888567, "fp16_scale": 1.0, "global_step": 729, "grad_norm": 2.0163117339345944, "learning_rate": 1.7889720581418488e-06, "loss": 0.4936, "step": 729 }, { "ETA": 4.55, "epoch": 0.23476443158063998, "fp16_scale": 1.0, "global_step": 730, "grad_norm": 1.988671170173158, "learning_rate": 1.7883314024611284e-06, "loss": 0.4436, "step": 730 }, { "ETA": 4.55, "epoch": 0.23508602669239428, "fp16_scale": 1.0, "global_step": 731, "grad_norm": 2.0975771896673088, "learning_rate": 1.7876898908589787e-06, "loss": 0.4049, "step": 731 }, { "ETA": 4.55, "epoch": 0.23540762180414856, "fp16_scale": 1.0, "global_step": 732, "grad_norm": 2.082944961166679, "learning_rate": 1.787047524031913e-06, "loss": 0.4801, "step": 732 }, { "ETA": 4.55, "epoch": 0.23572921691590287, "fp16_scale": 1.0, "global_step": 733, "grad_norm": 1.899197242182982, "learning_rate": 1.7864043026773738e-06, "loss": 0.4641, "step": 733 }, { "ETA": 4.54, "epoch": 0.23605081202765718, "fp16_scale": 1.0, "global_step": 734, "grad_norm": 1.9972371822291792, "learning_rate": 1.7857602274937306e-06, "loss": 0.3807, "step": 734 }, { "ETA": 4.54, "epoch": 0.2363724071394115, "fp16_scale": 1.0, "global_step": 735, "grad_norm": 2.193975694579717, "learning_rate": 1.7851152991802808e-06, "loss": 0.3962, "step": 735 }, { "ETA": 4.54, "epoch": 0.23669400225116577, "fp16_scale": 1.0, "global_step": 736, "grad_norm": 1.9577948654527098, "learning_rate": 1.7844695184372473e-06, "loss": 0.4417, "step": 736 }, { "ETA": 4.53, "epoch": 0.23701559736292008, "fp16_scale": 1.0, "global_step": 737, "grad_norm": 1.8564902332589925, "learning_rate": 1.7838228859657794e-06, "loss": 0.436, "step": 737 }, { "ETA": 4.53, "epoch": 0.23733719247467439, "fp16_scale": 1.0, "global_step": 738, "grad_norm": 2.266637819531783, "learning_rate": 1.78317540246795e-06, "loss": 0.4337, "step": 738 }, { "ETA": 4.53, "epoch": 0.2376587875864287, "fp16_scale": 1.0, "global_step": 739, "grad_norm": 2.158320028995456, "learning_rate": 1.7825270686467567e-06, "loss": 0.5044, "step": 739 }, { "ETA": 4.53, "epoch": 0.23798038269818297, "fp16_scale": 1.0, "global_step": 740, "grad_norm": 2.119824332945789, "learning_rate": 1.7818778852061206e-06, "loss": 0.4589, "step": 740 }, { "ETA": 4.52, "epoch": 0.23830197780993728, "fp16_scale": 1.0, "global_step": 741, "grad_norm": 2.2978059185149107, "learning_rate": 1.7812278528508842e-06, "loss": 0.3989, "step": 741 }, { "ETA": 4.52, "epoch": 0.2386235729216916, "fp16_scale": 1.0, "global_step": 742, "grad_norm": 1.9956639370091604, "learning_rate": 1.7805769722868128e-06, "loss": 0.4349, "step": 742 }, { "ETA": 4.52, "epoch": 0.2389451680334459, "fp16_scale": 1.0, "global_step": 743, "grad_norm": 2.037044955159949, "learning_rate": 1.7799252442205926e-06, "loss": 0.4272, "step": 743 }, { "ETA": 4.52, "epoch": 0.2392667631452002, "fp16_scale": 1.0, "global_step": 744, "grad_norm": 1.8554231707091167, "learning_rate": 1.779272669359829e-06, "loss": 0.4673, "step": 744 }, { "ETA": 4.52, "epoch": 0.2395883582569545, "fp16_scale": 1.0, "global_step": 745, "grad_norm": 2.2053717811448412, "learning_rate": 1.7786192484130477e-06, "loss": 0.4457, "step": 745 }, { "ETA": 4.51, "epoch": 0.2399099533687088, "fp16_scale": 1.0, "global_step": 746, "grad_norm": 2.317068308385471, "learning_rate": 1.7779649820896925e-06, "loss": 0.4507, "step": 746 }, { "ETA": 4.51, "epoch": 0.2402315484804631, "fp16_scale": 1.0, "global_step": 747, "grad_norm": 2.2616330283114303, "learning_rate": 1.7773098711001255e-06, "loss": 0.3667, "step": 747 }, { "ETA": 4.51, "epoch": 0.2405531435922174, "fp16_scale": 1.0, "global_step": 748, "grad_norm": 2.139905482754699, "learning_rate": 1.7766539161556251e-06, "loss": 0.4098, "step": 748 }, { "ETA": 4.51, "epoch": 0.2408747387039717, "fp16_scale": 1.0, "global_step": 749, "grad_norm": 1.8311878172702902, "learning_rate": 1.7759971179683872e-06, "loss": 0.3987, "step": 749 }, { "ETA": 4.5, "epoch": 0.241196333815726, "fp16_scale": 1.0, "global_step": 750, "grad_norm": 2.138934959196834, "learning_rate": 1.7753394772515228e-06, "loss": 0.4585, "step": 750 }, { "ETA": 4.5, "epoch": 0.2415179289274803, "fp16_scale": 1.0, "global_step": 751, "grad_norm": 2.222212966993316, "learning_rate": 1.7746809947190567e-06, "loss": 0.3848, "step": 751 }, { "ETA": 4.5, "epoch": 0.24183952403923462, "fp16_scale": 1.0, "global_step": 752, "grad_norm": 2.1018816148178368, "learning_rate": 1.7740216710859288e-06, "loss": 0.4393, "step": 752 }, { "ETA": 4.5, "epoch": 0.2421611191509889, "fp16_scale": 1.0, "global_step": 753, "grad_norm": 1.9861261843879785, "learning_rate": 1.773361507067992e-06, "loss": 0.3987, "step": 753 }, { "ETA": 4.5, "epoch": 0.2424827142627432, "fp16_scale": 1.0, "global_step": 754, "grad_norm": 1.8913263997988305, "learning_rate": 1.7727005033820114e-06, "loss": 0.4597, "step": 754 }, { "ETA": 4.49, "epoch": 0.2428043093744975, "fp16_scale": 1.0, "global_step": 755, "grad_norm": 2.0630895122508397, "learning_rate": 1.7720386607456636e-06, "loss": 0.4939, "step": 755 }, { "ETA": 4.49, "epoch": 0.24312590448625182, "fp16_scale": 1.0, "global_step": 756, "grad_norm": 1.8784062689214744, "learning_rate": 1.7713759798775372e-06, "loss": 0.5216, "step": 756 }, { "ETA": 4.49, "epoch": 0.2434474995980061, "fp16_scale": 1.0, "global_step": 757, "grad_norm": 2.0242708323883543, "learning_rate": 1.7707124614971294e-06, "loss": 0.4566, "step": 757 }, { "ETA": 4.49, "epoch": 0.2437690947097604, "fp16_scale": 1.0, "global_step": 758, "grad_norm": 1.8976058234202664, "learning_rate": 1.770048106324847e-06, "loss": 0.4751, "step": 758 }, { "ETA": 4.49, "epoch": 0.24409068982151472, "fp16_scale": 1.0, "global_step": 759, "grad_norm": 2.1489217687260904, "learning_rate": 1.7693829150820067e-06, "loss": 0.3589, "step": 759 }, { "ETA": 4.49, "epoch": 0.24441228493326903, "fp16_scale": 1.0, "global_step": 760, "grad_norm": 1.8301097217537243, "learning_rate": 1.7687168884908314e-06, "loss": 0.4735, "step": 760 }, { "ETA": 4.48, "epoch": 0.2447338800450233, "fp16_scale": 1.0, "global_step": 761, "grad_norm": 1.9556663336829052, "learning_rate": 1.7680500272744515e-06, "loss": 0.4321, "step": 761 }, { "ETA": 4.48, "epoch": 0.2450554751567776, "fp16_scale": 1.0, "global_step": 762, "grad_norm": 2.0829984705854794, "learning_rate": 1.767382332156904e-06, "loss": 0.5225, "step": 762 }, { "ETA": 4.48, "epoch": 0.24537707026853192, "fp16_scale": 1.0, "global_step": 763, "grad_norm": 1.9064636228392189, "learning_rate": 1.7667138038631305e-06, "loss": 0.4515, "step": 763 }, { "ETA": 4.48, "epoch": 0.24569866538028623, "fp16_scale": 1.0, "global_step": 764, "grad_norm": 2.137222624168098, "learning_rate": 1.766044443118978e-06, "loss": 0.3982, "step": 764 }, { "ETA": 4.47, "epoch": 0.2460202604920405, "fp16_scale": 1.0, "global_step": 765, "grad_norm": 1.7814841302675692, "learning_rate": 1.7653742506511966e-06, "loss": 0.5006, "step": 765 }, { "ETA": 4.47, "epoch": 0.24634185560379482, "fp16_scale": 1.0, "global_step": 766, "grad_norm": 1.8475546846241377, "learning_rate": 1.7647032271874399e-06, "loss": 0.4, "step": 766 }, { "ETA": 4.47, "epoch": 0.24666345071554913, "fp16_scale": 1.0, "global_step": 767, "grad_norm": 2.145473176810697, "learning_rate": 1.7640313734562638e-06, "loss": 0.481, "step": 767 }, { "ETA": 4.47, "epoch": 0.24698504582730343, "fp16_scale": 1.0, "global_step": 768, "grad_norm": 1.922921093523383, "learning_rate": 1.7633586901871248e-06, "loss": 0.447, "step": 768 }, { "ETA": 4.47, "epoch": 0.24730664093905771, "fp16_scale": 1.0, "global_step": 769, "grad_norm": 2.0565372242087903, "learning_rate": 1.7626851781103818e-06, "loss": 0.4808, "step": 769 }, { "ETA": 4.47, "epoch": 0.24762823605081202, "fp16_scale": 1.0, "global_step": 770, "grad_norm": 1.9792795749250929, "learning_rate": 1.762010837957292e-06, "loss": 0.4895, "step": 770 }, { "ETA": 4.47, "epoch": 0.24794983116256633, "fp16_scale": 1.0, "global_step": 771, "grad_norm": 1.883691722510702, "learning_rate": 1.7613356704600121e-06, "loss": 0.4819, "step": 771 }, { "ETA": 4.46, "epoch": 0.24827142627432064, "fp16_scale": 1.0, "global_step": 772, "grad_norm": 2.2677718519687144, "learning_rate": 1.7606596763515972e-06, "loss": 0.392, "step": 772 }, { "ETA": 4.46, "epoch": 0.24859302138607492, "fp16_scale": 1.0, "global_step": 773, "grad_norm": 2.1000510484291746, "learning_rate": 1.7599828563660001e-06, "loss": 0.4982, "step": 773 }, { "ETA": 4.46, "epoch": 0.24891461649782923, "fp16_scale": 1.0, "global_step": 774, "grad_norm": 1.9092489891093536, "learning_rate": 1.75930521123807e-06, "loss": 0.4644, "step": 774 }, { "ETA": 4.46, "epoch": 0.24923621160958354, "fp16_scale": 1.0, "global_step": 775, "grad_norm": 1.8151348544203338, "learning_rate": 1.7586267417035514e-06, "loss": 0.4389, "step": 775 }, { "ETA": 4.46, "epoch": 0.24955780672133784, "fp16_scale": 1.0, "global_step": 776, "grad_norm": 2.000002819432597, "learning_rate": 1.7579474484990855e-06, "loss": 0.4121, "step": 776 }, { "ETA": 4.46, "epoch": 0.24987940183309212, "fp16_scale": 1.0, "global_step": 777, "grad_norm": 2.175264685351054, "learning_rate": 1.757267332362206e-06, "loss": 0.4345, "step": 777 }, { "ETA": 4.45, "epoch": 0.25020099694484643, "fp16_scale": 1.0, "global_step": 778, "grad_norm": 2.2492227906871243, "learning_rate": 1.7565863940313413e-06, "loss": 0.4726, "step": 778 }, { "ETA": 4.45, "epoch": 0.2505225920566007, "fp16_scale": 1.0, "global_step": 779, "grad_norm": 2.035071396505975, "learning_rate": 1.755904634245812e-06, "loss": 0.3934, "step": 779 }, { "ETA": 4.45, "epoch": 0.25084418716835505, "fp16_scale": 1.0, "global_step": 780, "grad_norm": 1.8616710950931066, "learning_rate": 1.7552220537458305e-06, "loss": 0.3698, "step": 780 }, { "ETA": 4.45, "epoch": 0.25116578228010933, "fp16_scale": 1.0, "global_step": 781, "grad_norm": 1.923068394286512, "learning_rate": 1.7545386532725007e-06, "loss": 0.3803, "step": 781 }, { "ETA": 4.44, "epoch": 0.25148737739186366, "fp16_scale": 1.0, "global_step": 782, "grad_norm": 1.900807572854149, "learning_rate": 1.7538544335678162e-06, "loss": 0.5162, "step": 782 }, { "ETA": 4.44, "epoch": 0.25180897250361794, "fp16_scale": 1.0, "global_step": 783, "grad_norm": 2.0443781807318606, "learning_rate": 1.753169395374661e-06, "loss": 0.412, "step": 783 }, { "ETA": 4.44, "epoch": 0.2521305676153722, "fp16_scale": 1.0, "global_step": 784, "grad_norm": 2.1369422399704097, "learning_rate": 1.7524835394368065e-06, "loss": 0.5262, "step": 784 }, { "ETA": 4.44, "epoch": 0.25245216272712656, "fp16_scale": 1.0, "global_step": 785, "grad_norm": 2.0901290992793378, "learning_rate": 1.751796866498913e-06, "loss": 0.4367, "step": 785 }, { "ETA": 4.43, "epoch": 0.25277375783888084, "fp16_scale": 1.0, "global_step": 786, "grad_norm": 1.9588990538395974, "learning_rate": 1.7511093773065273e-06, "loss": 0.4255, "step": 786 }, { "ETA": 4.43, "epoch": 0.2530953529506352, "fp16_scale": 1.0, "global_step": 787, "grad_norm": 2.3157784000612227, "learning_rate": 1.7504210726060826e-06, "loss": 0.4295, "step": 787 }, { "ETA": 4.43, "epoch": 0.25341694806238946, "fp16_scale": 1.0, "global_step": 788, "grad_norm": 1.7158073363100894, "learning_rate": 1.7497319531448976e-06, "loss": 0.443, "step": 788 }, { "ETA": 4.43, "epoch": 0.25373854317414374, "fp16_scale": 1.0, "global_step": 789, "grad_norm": 2.2600046462658505, "learning_rate": 1.7490420196711755e-06, "loss": 0.4934, "step": 789 }, { "ETA": 4.43, "epoch": 0.2540601382858981, "fp16_scale": 1.0, "global_step": 790, "grad_norm": 1.7129405170763643, "learning_rate": 1.7483512729340032e-06, "loss": 0.4376, "step": 790 }, { "ETA": 4.43, "epoch": 0.25438173339765235, "fp16_scale": 1.0, "global_step": 791, "grad_norm": 1.9897989562336427, "learning_rate": 1.7476597136833511e-06, "loss": 0.4754, "step": 791 }, { "ETA": 4.42, "epoch": 0.25470332850940663, "fp16_scale": 1.0, "global_step": 792, "grad_norm": 1.8201510096470175, "learning_rate": 1.7469673426700713e-06, "loss": 0.4198, "step": 792 }, { "ETA": 4.42, "epoch": 0.25502492362116097, "fp16_scale": 1.0, "global_step": 793, "grad_norm": 1.7981216800446684, "learning_rate": 1.7462741606458973e-06, "loss": 0.3971, "step": 793 }, { "ETA": 4.42, "epoch": 0.25534651873291525, "fp16_scale": 1.0, "global_step": 794, "grad_norm": 1.9620634932865766, "learning_rate": 1.7455801683634431e-06, "loss": 0.4493, "step": 794 }, { "ETA": 4.42, "epoch": 0.2556681138446696, "fp16_scale": 1.0, "global_step": 795, "grad_norm": 1.998101638799515, "learning_rate": 1.7448853665762027e-06, "loss": 0.4601, "step": 795 }, { "ETA": 4.42, "epoch": 0.25598970895642387, "fp16_scale": 1.0, "global_step": 796, "grad_norm": 2.1496093269738283, "learning_rate": 1.744189756038549e-06, "loss": 0.4203, "step": 796 }, { "ETA": 4.42, "epoch": 0.25631130406817815, "fp16_scale": 1.0, "global_step": 797, "grad_norm": 1.787968047373974, "learning_rate": 1.7434933375057327e-06, "loss": 0.4502, "step": 797 }, { "ETA": 4.42, "epoch": 0.2566328991799325, "fp16_scale": 1.0, "global_step": 798, "grad_norm": 1.7911742148217042, "learning_rate": 1.7427961117338817e-06, "loss": 0.4849, "step": 798 }, { "ETA": 4.41, "epoch": 0.25695449429168676, "fp16_scale": 1.0, "global_step": 799, "grad_norm": 1.9238691149579596, "learning_rate": 1.742098079480001e-06, "loss": 0.4768, "step": 799 }, { "ETA": 4.41, "epoch": 0.25727608940344104, "fp16_scale": 1.0, "global_step": 800, "grad_norm": 2.194856263523492, "learning_rate": 1.7413992415019704e-06, "loss": 0.4083, "step": 800 }, { "ETA": 4.43, "epoch": 0.2575976845151954, "fp16_scale": 1.0, "global_step": 801, "grad_norm": 2.2135807781089194, "learning_rate": 1.7406995985585453e-06, "loss": 0.4369, "step": 801 }, { "ETA": 4.43, "epoch": 0.25791927962694966, "fp16_scale": 1.0, "global_step": 802, "grad_norm": 1.9396102788499803, "learning_rate": 1.7399991514093546e-06, "loss": 0.4444, "step": 802 }, { "ETA": 4.43, "epoch": 0.258240874738704, "fp16_scale": 1.0, "global_step": 803, "grad_norm": 1.9528208225357846, "learning_rate": 1.7392979008149e-06, "loss": 0.4459, "step": 803 }, { "ETA": 4.43, "epoch": 0.2585624698504583, "fp16_scale": 1.0, "global_step": 804, "grad_norm": 2.0034358076573864, "learning_rate": 1.7385958475365569e-06, "loss": 0.4678, "step": 804 }, { "ETA": 4.43, "epoch": 0.25888406496221256, "fp16_scale": 1.0, "global_step": 805, "grad_norm": 2.1333845100844457, "learning_rate": 1.7378929923365703e-06, "loss": 0.45, "step": 805 }, { "ETA": 4.42, "epoch": 0.2592056600739669, "fp16_scale": 1.0, "global_step": 806, "grad_norm": 2.0507099534205753, "learning_rate": 1.7371893359780573e-06, "loss": 0.5479, "step": 806 }, { "ETA": 4.42, "epoch": 0.2595272551857212, "fp16_scale": 1.0, "global_step": 807, "grad_norm": 1.9725026084076829, "learning_rate": 1.7364848792250047e-06, "loss": 0.4651, "step": 807 }, { "ETA": 4.42, "epoch": 0.25984885029747545, "fp16_scale": 1.0, "global_step": 808, "grad_norm": 2.2343844990472124, "learning_rate": 1.7357796228422675e-06, "loss": 0.3991, "step": 808 }, { "ETA": 4.42, "epoch": 0.2601704454092298, "fp16_scale": 1.0, "global_step": 809, "grad_norm": 1.9731393977209701, "learning_rate": 1.7350735675955695e-06, "loss": 0.4105, "step": 809 }, { "ETA": 4.42, "epoch": 0.26049204052098407, "fp16_scale": 1.0, "global_step": 810, "grad_norm": 1.9473285535913734, "learning_rate": 1.7343667142515021e-06, "loss": 0.4022, "step": 810 }, { "ETA": 4.41, "epoch": 0.2608136356327384, "fp16_scale": 1.0, "global_step": 811, "grad_norm": 2.2285101547456128, "learning_rate": 1.7336590635775228e-06, "loss": 0.4212, "step": 811 }, { "ETA": 4.41, "epoch": 0.2611352307444927, "fp16_scale": 1.0, "global_step": 812, "grad_norm": 2.0016062409752933, "learning_rate": 1.7329506163419546e-06, "loss": 0.3362, "step": 812 }, { "ETA": 4.4, "epoch": 0.26145682585624697, "fp16_scale": 1.0, "global_step": 813, "grad_norm": 1.8228175063218146, "learning_rate": 1.732241373313986e-06, "loss": 0.4284, "step": 813 }, { "ETA": 4.4, "epoch": 0.2617784209680013, "fp16_scale": 1.0, "global_step": 814, "grad_norm": 1.7879482618101108, "learning_rate": 1.731531335263669e-06, "loss": 0.5041, "step": 814 }, { "ETA": 4.4, "epoch": 0.2621000160797556, "fp16_scale": 1.0, "global_step": 815, "grad_norm": 1.7973268099244846, "learning_rate": 1.7308205029619186e-06, "loss": 0.4017, "step": 815 }, { "ETA": 4.4, "epoch": 0.26242161119150986, "fp16_scale": 1.0, "global_step": 816, "grad_norm": 2.0170356461538064, "learning_rate": 1.7301088771805134e-06, "loss": 0.3851, "step": 816 }, { "ETA": 4.39, "epoch": 0.2627432063032642, "fp16_scale": 1.0, "global_step": 817, "grad_norm": 1.962065648722113, "learning_rate": 1.729396458692092e-06, "loss": 0.3713, "step": 817 }, { "ETA": 4.39, "epoch": 0.2630648014150185, "fp16_scale": 1.0, "global_step": 818, "grad_norm": 2.1056835285183833, "learning_rate": 1.728683248270154e-06, "loss": 0.5194, "step": 818 }, { "ETA": 4.39, "epoch": 0.2633863965267728, "fp16_scale": 1.0, "global_step": 819, "grad_norm": 2.0273393563710864, "learning_rate": 1.72796924668906e-06, "loss": 0.3721, "step": 819 }, { "ETA": 4.38, "epoch": 0.2637079916385271, "fp16_scale": 1.0, "global_step": 820, "grad_norm": 1.9509036703352076, "learning_rate": 1.727254454724028e-06, "loss": 0.349, "step": 820 }, { "ETA": 4.38, "epoch": 0.2640295867502814, "fp16_scale": 1.0, "global_step": 821, "grad_norm": 2.025537968078176, "learning_rate": 1.726538873151135e-06, "loss": 0.3888, "step": 821 }, { "ETA": 4.38, "epoch": 0.2643511818620357, "fp16_scale": 1.0, "global_step": 822, "grad_norm": 1.9390468488956132, "learning_rate": 1.7258225027473153e-06, "loss": 0.4164, "step": 822 }, { "ETA": 4.38, "epoch": 0.26467277697379, "fp16_scale": 1.0, "global_step": 823, "grad_norm": 1.927968786601008, "learning_rate": 1.7251053442903594e-06, "loss": 0.4315, "step": 823 }, { "ETA": 4.38, "epoch": 0.2649943720855443, "fp16_scale": 1.0, "global_step": 824, "grad_norm": 2.144994559077059, "learning_rate": 1.7243873985589134e-06, "loss": 0.4794, "step": 824 }, { "ETA": 4.37, "epoch": 0.2653159671972986, "fp16_scale": 1.0, "global_step": 825, "grad_norm": 2.2808454226695085, "learning_rate": 1.723668666332479e-06, "loss": 0.4492, "step": 825 }, { "ETA": 4.37, "epoch": 0.2656375623090529, "fp16_scale": 1.0, "global_step": 826, "grad_norm": 1.9193515378791834, "learning_rate": 1.7229491483914106e-06, "loss": 0.4437, "step": 826 }, { "ETA": 4.37, "epoch": 0.2659591574208072, "fp16_scale": 1.0, "global_step": 827, "grad_norm": 2.012237408269288, "learning_rate": 1.7222288455169162e-06, "loss": 0.4653, "step": 827 }, { "ETA": 4.37, "epoch": 0.2662807525325615, "fp16_scale": 1.0, "global_step": 828, "grad_norm": 1.9147271412988327, "learning_rate": 1.7215077584910563e-06, "loss": 0.4144, "step": 828 }, { "ETA": 4.37, "epoch": 0.2666023476443158, "fp16_scale": 1.0, "global_step": 829, "grad_norm": 1.9663151726569172, "learning_rate": 1.7207858880967425e-06, "loss": 0.4917, "step": 829 }, { "ETA": 4.37, "epoch": 0.2669239427560701, "fp16_scale": 1.0, "global_step": 830, "grad_norm": 2.2663738245216725, "learning_rate": 1.7200632351177367e-06, "loss": 0.4011, "step": 830 }, { "ETA": 4.36, "epoch": 0.2672455378678244, "fp16_scale": 1.0, "global_step": 831, "grad_norm": 2.0313714161063405, "learning_rate": 1.719339800338651e-06, "loss": 0.4241, "step": 831 }, { "ETA": 4.36, "epoch": 0.26756713297957874, "fp16_scale": 1.0, "global_step": 832, "grad_norm": 1.9746138792173442, "learning_rate": 1.7186155845449464e-06, "loss": 0.5026, "step": 832 }, { "ETA": 4.36, "epoch": 0.267888728091333, "fp16_scale": 1.0, "global_step": 833, "grad_norm": 2.1934796616910988, "learning_rate": 1.7178905885229309e-06, "loss": 0.573, "step": 833 }, { "ETA": 4.36, "epoch": 0.2682103232030873, "fp16_scale": 1.0, "global_step": 834, "grad_norm": 1.9015089104423855, "learning_rate": 1.717164813059761e-06, "loss": 0.4096, "step": 834 }, { "ETA": 4.36, "epoch": 0.26853191831484163, "fp16_scale": 1.0, "global_step": 835, "grad_norm": 2.047960992129641, "learning_rate": 1.716438258943438e-06, "loss": 0.3958, "step": 835 }, { "ETA": 4.36, "epoch": 0.2688535134265959, "fp16_scale": 1.0, "global_step": 836, "grad_norm": 2.0625477287596663, "learning_rate": 1.71571092696281e-06, "loss": 0.4992, "step": 836 }, { "ETA": 4.35, "epoch": 0.2691751085383502, "fp16_scale": 1.0, "global_step": 837, "grad_norm": 2.0557599608220283, "learning_rate": 1.714982817907569e-06, "loss": 0.4152, "step": 837 }, { "ETA": 4.35, "epoch": 0.26949670365010453, "fp16_scale": 1.0, "global_step": 838, "grad_norm": 2.2111570974142913, "learning_rate": 1.7142539325682503e-06, "loss": 0.4189, "step": 838 }, { "ETA": 4.35, "epoch": 0.2698182987618588, "fp16_scale": 1.0, "global_step": 839, "grad_norm": 2.0871791563213917, "learning_rate": 1.7135242717362328e-06, "loss": 0.3642, "step": 839 }, { "ETA": 4.34, "epoch": 0.27013989387361315, "fp16_scale": 1.0, "global_step": 840, "grad_norm": 2.13373016968836, "learning_rate": 1.7127938362037373e-06, "loss": 0.4134, "step": 840 }, { "ETA": 4.34, "epoch": 0.2704614889853674, "fp16_scale": 1.0, "global_step": 841, "grad_norm": 2.250770840572516, "learning_rate": 1.7120626267638247e-06, "loss": 0.3658, "step": 841 }, { "ETA": 4.33, "epoch": 0.2707830840971217, "fp16_scale": 1.0, "global_step": 842, "grad_norm": 1.9239987500787747, "learning_rate": 1.7113306442103977e-06, "loss": 0.4717, "step": 842 }, { "ETA": 4.33, "epoch": 0.27110467920887604, "fp16_scale": 1.0, "global_step": 843, "grad_norm": 1.9123381993159572, "learning_rate": 1.7105978893381972e-06, "loss": 0.4548, "step": 843 }, { "ETA": 4.33, "epoch": 0.2714262743206303, "fp16_scale": 1.0, "global_step": 844, "grad_norm": 1.9111183798624058, "learning_rate": 1.7098643629428034e-06, "loss": 0.4728, "step": 844 }, { "ETA": 4.33, "epoch": 0.2717478694323846, "fp16_scale": 1.0, "global_step": 845, "grad_norm": 2.008869802468323, "learning_rate": 1.7091300658206334e-06, "loss": 0.4089, "step": 845 }, { "ETA": 4.33, "epoch": 0.27206946454413894, "fp16_scale": 1.0, "global_step": 846, "grad_norm": 1.9139533175385044, "learning_rate": 1.708394998768942e-06, "loss": 0.4335, "step": 846 }, { "ETA": 4.33, "epoch": 0.2723910596558932, "fp16_scale": 1.0, "global_step": 847, "grad_norm": 1.9587937440179755, "learning_rate": 1.707659162585819e-06, "loss": 0.4303, "step": 847 }, { "ETA": 4.32, "epoch": 0.27271265476764756, "fp16_scale": 1.0, "global_step": 848, "grad_norm": 2.1733534262363423, "learning_rate": 1.7069225580701904e-06, "loss": 0.4729, "step": 848 }, { "ETA": 4.32, "epoch": 0.27303424987940184, "fp16_scale": 1.0, "global_step": 849, "grad_norm": 1.88692041884661, "learning_rate": 1.7061851860218152e-06, "loss": 0.3859, "step": 849 }, { "ETA": 4.32, "epoch": 0.2733558449911561, "fp16_scale": 1.0, "global_step": 850, "grad_norm": 1.8429844593171765, "learning_rate": 1.7054470472412872e-06, "loss": 0.3967, "step": 850 }, { "ETA": 4.32, "epoch": 0.27367744010291045, "fp16_scale": 1.0, "global_step": 851, "grad_norm": 1.8390089168268084, "learning_rate": 1.7047081425300307e-06, "loss": 0.4034, "step": 851 }, { "ETA": 4.32, "epoch": 0.27399903521466473, "fp16_scale": 1.0, "global_step": 852, "grad_norm": 1.95375035549875, "learning_rate": 1.703968472690303e-06, "loss": 0.5002, "step": 852 }, { "ETA": 4.32, "epoch": 0.274320630326419, "fp16_scale": 1.0, "global_step": 853, "grad_norm": 2.062375827886167, "learning_rate": 1.7032280385251923e-06, "loss": 0.5042, "step": 853 }, { "ETA": 4.32, "epoch": 0.27464222543817335, "fp16_scale": 1.0, "global_step": 854, "grad_norm": 2.0272940296944686, "learning_rate": 1.7024868408386157e-06, "loss": 0.4469, "step": 854 }, { "ETA": 4.32, "epoch": 0.27496382054992763, "fp16_scale": 1.0, "global_step": 855, "grad_norm": 1.914313565079873, "learning_rate": 1.70174488043532e-06, "loss": 0.43, "step": 855 }, { "ETA": 4.31, "epoch": 0.27528541566168196, "fp16_scale": 1.0, "global_step": 856, "grad_norm": 2.0827423924599904, "learning_rate": 1.7010021581208797e-06, "loss": 0.4696, "step": 856 }, { "ETA": 4.31, "epoch": 0.27560701077343625, "fp16_scale": 1.0, "global_step": 857, "grad_norm": 2.034557993310201, "learning_rate": 1.7002586747016968e-06, "loss": 0.4108, "step": 857 }, { "ETA": 4.3, "epoch": 0.2759286058851905, "fp16_scale": 1.0, "global_step": 858, "grad_norm": 1.9798550191722615, "learning_rate": 1.6995144309849994e-06, "loss": 0.3915, "step": 858 }, { "ETA": 4.3, "epoch": 0.27625020099694486, "fp16_scale": 1.0, "global_step": 859, "grad_norm": 1.9784747074833027, "learning_rate": 1.6987694277788416e-06, "loss": 0.4445, "step": 859 }, { "ETA": 4.3, "epoch": 0.27657179610869914, "fp16_scale": 1.0, "global_step": 860, "grad_norm": 1.9162443927414687, "learning_rate": 1.6980236658921015e-06, "loss": 0.4557, "step": 860 }, { "ETA": 4.3, "epoch": 0.2768933912204534, "fp16_scale": 1.0, "global_step": 861, "grad_norm": 2.225603044902912, "learning_rate": 1.6972771461344812e-06, "loss": 0.417, "step": 861 }, { "ETA": 4.29, "epoch": 0.27721498633220776, "fp16_scale": 1.0, "global_step": 862, "grad_norm": 1.8177365253866995, "learning_rate": 1.6965298693165057e-06, "loss": 0.426, "step": 862 }, { "ETA": 4.29, "epoch": 0.27753658144396204, "fp16_scale": 1.0, "global_step": 863, "grad_norm": 1.9528751550469554, "learning_rate": 1.6957818362495218e-06, "loss": 0.4988, "step": 863 }, { "ETA": 4.29, "epoch": 0.2778581765557164, "fp16_scale": 1.0, "global_step": 864, "grad_norm": 1.9734214739855664, "learning_rate": 1.6950330477456975e-06, "loss": 0.462, "step": 864 }, { "ETA": 4.29, "epoch": 0.27817977166747065, "fp16_scale": 1.0, "global_step": 865, "grad_norm": 2.048578997590825, "learning_rate": 1.6942835046180214e-06, "loss": 0.4576, "step": 865 }, { "ETA": 4.29, "epoch": 0.27850136677922493, "fp16_scale": 1.0, "global_step": 866, "grad_norm": 2.0365831905860485, "learning_rate": 1.6935332076803005e-06, "loss": 0.4645, "step": 866 }, { "ETA": 4.29, "epoch": 0.27882296189097927, "fp16_scale": 1.0, "global_step": 867, "grad_norm": 2.3348995559649004, "learning_rate": 1.6927821577471609e-06, "loss": 0.4275, "step": 867 }, { "ETA": 4.29, "epoch": 0.27914455700273355, "fp16_scale": 1.0, "global_step": 868, "grad_norm": 1.9424993100231185, "learning_rate": 1.692030355634046e-06, "loss": 0.462, "step": 868 }, { "ETA": 4.28, "epoch": 0.2794661521144879, "fp16_scale": 1.0, "global_step": 869, "grad_norm": 2.103025002987846, "learning_rate": 1.6912778021572165e-06, "loss": 0.4325, "step": 869 }, { "ETA": 4.28, "epoch": 0.27978774722624217, "fp16_scale": 1.0, "global_step": 870, "grad_norm": 2.0322857291386516, "learning_rate": 1.6905244981337479e-06, "loss": 0.4719, "step": 870 }, { "ETA": 4.28, "epoch": 0.28010934233799645, "fp16_scale": 1.0, "global_step": 871, "grad_norm": 2.2323938212614096, "learning_rate": 1.689770444381531e-06, "loss": 0.3803, "step": 871 }, { "ETA": 4.28, "epoch": 0.2804309374497508, "fp16_scale": 1.0, "global_step": 872, "grad_norm": 2.1196793658788686, "learning_rate": 1.689015641719271e-06, "loss": 0.4481, "step": 872 }, { "ETA": 4.28, "epoch": 0.28075253256150506, "fp16_scale": 1.0, "global_step": 873, "grad_norm": 2.0436425268766296, "learning_rate": 1.688260090966486e-06, "loss": 0.5382, "step": 873 }, { "ETA": 4.27, "epoch": 0.28107412767325934, "fp16_scale": 1.0, "global_step": 874, "grad_norm": 2.114287881621631, "learning_rate": 1.687503792943506e-06, "loss": 0.3502, "step": 874 }, { "ETA": 4.27, "epoch": 0.2813957227850137, "fp16_scale": 1.0, "global_step": 875, "grad_norm": 2.048699928774643, "learning_rate": 1.6867467484714721e-06, "loss": 0.4626, "step": 875 }, { "ETA": 4.27, "epoch": 0.28171731789676796, "fp16_scale": 1.0, "global_step": 876, "grad_norm": 1.9385321275469745, "learning_rate": 1.6859889583723373e-06, "loss": 0.4314, "step": 876 }, { "ETA": 4.27, "epoch": 0.2820389130085223, "fp16_scale": 1.0, "global_step": 877, "grad_norm": 1.7902775204514527, "learning_rate": 1.6852304234688623e-06, "loss": 0.4857, "step": 877 }, { "ETA": 4.26, "epoch": 0.2823605081202766, "fp16_scale": 1.0, "global_step": 878, "grad_norm": 1.9188851498442143, "learning_rate": 1.6844711445846178e-06, "loss": 0.387, "step": 878 }, { "ETA": 4.26, "epoch": 0.28268210323203086, "fp16_scale": 1.0, "global_step": 879, "grad_norm": 1.8294737345080043, "learning_rate": 1.683711122543982e-06, "loss": 0.4349, "step": 879 }, { "ETA": 4.26, "epoch": 0.2830036983437852, "fp16_scale": 1.0, "global_step": 880, "grad_norm": 1.8871558500356846, "learning_rate": 1.6829503581721393e-06, "loss": 0.4346, "step": 880 }, { "ETA": 4.26, "epoch": 0.2833252934555395, "fp16_scale": 1.0, "global_step": 881, "grad_norm": 1.8862954619001415, "learning_rate": 1.6821888522950806e-06, "loss": 0.4941, "step": 881 }, { "ETA": 4.25, "epoch": 0.28364688856729375, "fp16_scale": 1.0, "global_step": 882, "grad_norm": 2.309636485905092, "learning_rate": 1.681426605739602e-06, "loss": 0.388, "step": 882 }, { "ETA": 4.25, "epoch": 0.2839684836790481, "fp16_scale": 1.0, "global_step": 883, "grad_norm": 2.0456379778426883, "learning_rate": 1.6806636193333038e-06, "loss": 0.4262, "step": 883 }, { "ETA": 4.25, "epoch": 0.28429007879080237, "fp16_scale": 1.0, "global_step": 884, "grad_norm": 1.9450465927363632, "learning_rate": 1.6798998939045892e-06, "loss": 0.4945, "step": 884 }, { "ETA": 4.25, "epoch": 0.2846116739025567, "fp16_scale": 1.0, "global_step": 885, "grad_norm": 1.9607234693559452, "learning_rate": 1.6791354302826637e-06, "loss": 0.3906, "step": 885 }, { "ETA": 4.25, "epoch": 0.284933269014311, "fp16_scale": 1.0, "global_step": 886, "grad_norm": 2.0309419851069057, "learning_rate": 1.6783702292975347e-06, "loss": 0.4137, "step": 886 }, { "ETA": 4.24, "epoch": 0.28525486412606527, "fp16_scale": 1.0, "global_step": 887, "grad_norm": 2.0088058356002683, "learning_rate": 1.6776042917800107e-06, "loss": 0.4294, "step": 887 }, { "ETA": 4.24, "epoch": 0.2855764592378196, "fp16_scale": 1.0, "global_step": 888, "grad_norm": 1.869920815793303, "learning_rate": 1.6768376185616983e-06, "loss": 0.3826, "step": 888 }, { "ETA": 4.24, "epoch": 0.2858980543495739, "fp16_scale": 1.0, "global_step": 889, "grad_norm": 2.08059274751399, "learning_rate": 1.6760702104750045e-06, "loss": 0.4728, "step": 889 }, { "ETA": 4.24, "epoch": 0.28621964946132816, "fp16_scale": 1.0, "global_step": 890, "grad_norm": 2.219387716824284, "learning_rate": 1.675302068353133e-06, "loss": 0.4536, "step": 890 }, { "ETA": 4.24, "epoch": 0.2865412445730825, "fp16_scale": 1.0, "global_step": 891, "grad_norm": 2.057716974572081, "learning_rate": 1.6745331930300856e-06, "loss": 0.3842, "step": 891 }, { "ETA": 4.24, "epoch": 0.2868628396848368, "fp16_scale": 1.0, "global_step": 892, "grad_norm": 1.815613753259894, "learning_rate": 1.6737635853406592e-06, "loss": 0.4699, "step": 892 }, { "ETA": 4.23, "epoch": 0.2871844347965911, "fp16_scale": 1.0, "global_step": 893, "grad_norm": 1.9216240038533512, "learning_rate": 1.6729932461204455e-06, "loss": 0.4485, "step": 893 }, { "ETA": 4.23, "epoch": 0.2875060299083454, "fp16_scale": 1.0, "global_step": 894, "grad_norm": 1.8816264379862395, "learning_rate": 1.6722221762058322e-06, "loss": 0.4268, "step": 894 }, { "ETA": 4.23, "epoch": 0.2878276250200997, "fp16_scale": 1.0, "global_step": 895, "grad_norm": 2.237772525520464, "learning_rate": 1.6714503764339985e-06, "loss": 0.4008, "step": 895 }, { "ETA": 4.23, "epoch": 0.288149220131854, "fp16_scale": 1.0, "global_step": 896, "grad_norm": 2.2021509983138436, "learning_rate": 1.6706778476429174e-06, "loss": 0.3638, "step": 896 }, { "ETA": 4.22, "epoch": 0.2884708152436083, "fp16_scale": 1.0, "global_step": 897, "grad_norm": 2.3140625427022163, "learning_rate": 1.6699045906713524e-06, "loss": 0.4576, "step": 897 }, { "ETA": 4.22, "epoch": 0.28879241035536257, "fp16_scale": 1.0, "global_step": 898, "grad_norm": 1.9141845352200815, "learning_rate": 1.669130606358858e-06, "loss": 0.4727, "step": 898 }, { "ETA": 4.22, "epoch": 0.2891140054671169, "fp16_scale": 1.0, "global_step": 899, "grad_norm": 1.8455199467528947, "learning_rate": 1.668355895545779e-06, "loss": 0.526, "step": 899 }, { "ETA": 4.22, "epoch": 0.2894356005788712, "fp16_scale": 1.0, "global_step": 900, "grad_norm": 1.9324765236546506, "learning_rate": 1.6675804590732479e-06, "loss": 0.4558, "step": 900 }, { "ETA": 4.22, "epoch": 0.2897571956906255, "fp16_scale": 1.0, "global_step": 901, "grad_norm": 2.0324500329401682, "learning_rate": 1.666804297783186e-06, "loss": 0.3735, "step": 901 }, { "ETA": 4.21, "epoch": 0.2900787908023798, "fp16_scale": 1.0, "global_step": 902, "grad_norm": 2.2331769620950586, "learning_rate": 1.6660274125183007e-06, "loss": 0.432, "step": 902 }, { "ETA": 4.21, "epoch": 0.2904003859141341, "fp16_scale": 1.0, "global_step": 903, "grad_norm": 1.9605472275641855, "learning_rate": 1.6652498041220864e-06, "loss": 0.4608, "step": 903 }, { "ETA": 4.21, "epoch": 0.2907219810258884, "fp16_scale": 1.0, "global_step": 904, "grad_norm": 1.9836667106071602, "learning_rate": 1.6644714734388216e-06, "loss": 0.37, "step": 904 }, { "ETA": 4.21, "epoch": 0.2910435761376427, "fp16_scale": 1.0, "global_step": 905, "grad_norm": 2.0883224813836896, "learning_rate": 1.6636924213135703e-06, "loss": 0.4197, "step": 905 }, { "ETA": 4.21, "epoch": 0.291365171249397, "fp16_scale": 1.0, "global_step": 906, "grad_norm": 1.8957073666424162, "learning_rate": 1.6629126485921784e-06, "loss": 0.4679, "step": 906 }, { "ETA": 4.2, "epoch": 0.2916867663611513, "fp16_scale": 1.0, "global_step": 907, "grad_norm": 2.111429723636016, "learning_rate": 1.6621321561212751e-06, "loss": 0.4214, "step": 907 }, { "ETA": 4.2, "epoch": 0.2920083614729056, "fp16_scale": 1.0, "global_step": 908, "grad_norm": 2.0575896823495277, "learning_rate": 1.6613509447482713e-06, "loss": 0.4531, "step": 908 }, { "ETA": 4.2, "epoch": 0.29232995658465993, "fp16_scale": 1.0, "global_step": 909, "grad_norm": 1.9722130872935568, "learning_rate": 1.6605690153213569e-06, "loss": 0.4384, "step": 909 }, { "ETA": 4.2, "epoch": 0.2926515516964142, "fp16_scale": 1.0, "global_step": 910, "grad_norm": 1.8297545060113751, "learning_rate": 1.6597863686895033e-06, "loss": 0.3648, "step": 910 }, { "ETA": 4.19, "epoch": 0.2929731468081685, "fp16_scale": 1.0, "global_step": 911, "grad_norm": 1.8092968598163315, "learning_rate": 1.6590030057024594e-06, "loss": 0.383, "step": 911 }, { "ETA": 4.19, "epoch": 0.29329474191992283, "fp16_scale": 1.0, "global_step": 912, "grad_norm": 2.0699529416823705, "learning_rate": 1.6582189272107524e-06, "loss": 0.4354, "step": 912 }, { "ETA": 4.19, "epoch": 0.2936163370316771, "fp16_scale": 1.0, "global_step": 913, "grad_norm": 2.1270871985867372, "learning_rate": 1.6574341340656859e-06, "loss": 0.4982, "step": 913 }, { "ETA": 4.19, "epoch": 0.29393793214343145, "fp16_scale": 1.0, "global_step": 914, "grad_norm": 1.7365809088090376, "learning_rate": 1.65664862711934e-06, "loss": 0.4122, "step": 914 }, { "ETA": 4.19, "epoch": 0.2942595272551857, "fp16_scale": 1.0, "global_step": 915, "grad_norm": 2.2053078988627832, "learning_rate": 1.6558624072245694e-06, "loss": 0.414, "step": 915 }, { "ETA": 4.19, "epoch": 0.29458112236694, "fp16_scale": 1.0, "global_step": 916, "grad_norm": 1.9402797847729918, "learning_rate": 1.6550754752350029e-06, "loss": 0.5087, "step": 916 }, { "ETA": 4.18, "epoch": 0.29490271747869434, "fp16_scale": 1.0, "global_step": 917, "grad_norm": 2.0814519151266078, "learning_rate": 1.654287832005043e-06, "loss": 0.4751, "step": 917 }, { "ETA": 4.18, "epoch": 0.2952243125904486, "fp16_scale": 1.0, "global_step": 918, "grad_norm": 2.027778234865114, "learning_rate": 1.6534994783898632e-06, "loss": 0.532, "step": 918 }, { "ETA": 4.18, "epoch": 0.2955459077022029, "fp16_scale": 1.0, "global_step": 919, "grad_norm": 2.0031415254223517, "learning_rate": 1.6527104152454094e-06, "loss": 0.4551, "step": 919 }, { "ETA": 4.18, "epoch": 0.29586750281395724, "fp16_scale": 1.0, "global_step": 920, "grad_norm": 1.8159943503802232, "learning_rate": 1.651920643428398e-06, "loss": 0.4119, "step": 920 }, { "ETA": 4.18, "epoch": 0.2961890979257115, "fp16_scale": 1.0, "global_step": 921, "grad_norm": 1.9489260671056785, "learning_rate": 1.6511301637963135e-06, "loss": 0.4342, "step": 921 }, { "ETA": 4.18, "epoch": 0.29651069303746586, "fp16_scale": 1.0, "global_step": 922, "grad_norm": 2.3098551375258363, "learning_rate": 1.6503389772074101e-06, "loss": 0.4943, "step": 922 }, { "ETA": 4.18, "epoch": 0.29683228814922014, "fp16_scale": 1.0, "global_step": 923, "grad_norm": 1.788580790300466, "learning_rate": 1.6495470845207096e-06, "loss": 0.4268, "step": 923 }, { "ETA": 4.17, "epoch": 0.2971538832609744, "fp16_scale": 1.0, "global_step": 924, "grad_norm": 2.2036099523986175, "learning_rate": 1.6487544865959993e-06, "loss": 0.454, "step": 924 }, { "ETA": 4.17, "epoch": 0.29747547837272875, "fp16_scale": 1.0, "global_step": 925, "grad_norm": 2.119978543320096, "learning_rate": 1.6479611842938336e-06, "loss": 0.4539, "step": 925 }, { "ETA": 4.17, "epoch": 0.29779707348448303, "fp16_scale": 1.0, "global_step": 926, "grad_norm": 1.9519790961903443, "learning_rate": 1.6471671784755307e-06, "loss": 0.4717, "step": 926 }, { "ETA": 4.17, "epoch": 0.2981186685962373, "fp16_scale": 1.0, "global_step": 927, "grad_norm": 2.09850957875013, "learning_rate": 1.6463724700031729e-06, "loss": 0.4486, "step": 927 }, { "ETA": 4.17, "epoch": 0.29844026370799165, "fp16_scale": 1.0, "global_step": 928, "grad_norm": 1.967319260081159, "learning_rate": 1.6455770597396057e-06, "loss": 0.3927, "step": 928 }, { "ETA": 4.17, "epoch": 0.29876185881974593, "fp16_scale": 1.0, "global_step": 929, "grad_norm": 1.9256768270796039, "learning_rate": 1.644780948548436e-06, "loss": 0.4866, "step": 929 }, { "ETA": 4.17, "epoch": 0.29908345393150026, "fp16_scale": 1.0, "global_step": 930, "grad_norm": 2.8211455565522945, "learning_rate": 1.6439841372940327e-06, "loss": 0.4714, "step": 930 }, { "ETA": 4.16, "epoch": 0.29940504904325455, "fp16_scale": 1.0, "global_step": 931, "grad_norm": 1.9215018024476105, "learning_rate": 1.6431866268415236e-06, "loss": 0.4105, "step": 931 }, { "ETA": 4.16, "epoch": 0.2997266441550088, "fp16_scale": 1.0, "global_step": 932, "grad_norm": 2.274164232899391, "learning_rate": 1.6423884180567957e-06, "loss": 0.505, "step": 932 }, { "ETA": 4.16, "epoch": 0.30004823926676316, "fp16_scale": 1.0, "global_step": 933, "grad_norm": 2.29083762275944, "learning_rate": 1.6415895118064957e-06, "loss": 0.4023, "step": 933 }, { "ETA": 4.16, "epoch": 0.30036983437851744, "fp16_scale": 1.0, "global_step": 934, "grad_norm": 1.9240621370254039, "learning_rate": 1.6407899089580259e-06, "loss": 0.4576, "step": 934 }, { "ETA": 4.16, "epoch": 0.3006914294902717, "fp16_scale": 1.0, "global_step": 935, "grad_norm": 2.0423004097962707, "learning_rate": 1.639989610379546e-06, "loss": 0.4809, "step": 935 }, { "ETA": 4.16, "epoch": 0.30101302460202606, "fp16_scale": 1.0, "global_step": 936, "grad_norm": 2.1610394320106567, "learning_rate": 1.63918861693997e-06, "loss": 0.5741, "step": 936 }, { "ETA": 4.15, "epoch": 0.30133461971378034, "fp16_scale": 1.0, "global_step": 937, "grad_norm": 1.8416511356810563, "learning_rate": 1.6383869295089679e-06, "loss": 0.4964, "step": 937 }, { "ETA": 4.15, "epoch": 0.3016562148255347, "fp16_scale": 1.0, "global_step": 938, "grad_norm": 2.009089661026409, "learning_rate": 1.6375845489569614e-06, "loss": 0.4623, "step": 938 }, { "ETA": 4.15, "epoch": 0.30197780993728895, "fp16_scale": 1.0, "global_step": 939, "grad_norm": 1.9471135785187048, "learning_rate": 1.636781476155126e-06, "loss": 0.4525, "step": 939 }, { "ETA": 4.15, "epoch": 0.30229940504904323, "fp16_scale": 1.0, "global_step": 940, "grad_norm": 1.9459217505997315, "learning_rate": 1.6359777119753883e-06, "loss": 0.4674, "step": 940 }, { "ETA": 4.15, "epoch": 0.30262100016079757, "fp16_scale": 1.0, "global_step": 941, "grad_norm": 1.9527092305549782, "learning_rate": 1.6351732572904257e-06, "loss": 0.448, "step": 941 }, { "ETA": 4.15, "epoch": 0.30294259527255185, "fp16_scale": 1.0, "global_step": 942, "grad_norm": 2.0542514425282605, "learning_rate": 1.6343681129736658e-06, "loss": 0.4661, "step": 942 }, { "ETA": 4.14, "epoch": 0.30326419038430613, "fp16_scale": 1.0, "global_step": 943, "grad_norm": 1.8713043915130485, "learning_rate": 1.6335622798992838e-06, "loss": 0.4104, "step": 943 }, { "ETA": 4.14, "epoch": 0.30358578549606047, "fp16_scale": 1.0, "global_step": 944, "grad_norm": 1.9127342115214452, "learning_rate": 1.6327557589422035e-06, "loss": 0.4048, "step": 944 }, { "ETA": 4.14, "epoch": 0.30390738060781475, "fp16_scale": 1.0, "global_step": 945, "grad_norm": 1.822000083393841, "learning_rate": 1.631948550978096e-06, "loss": 0.4179, "step": 945 }, { "ETA": 4.14, "epoch": 0.3042289757195691, "fp16_scale": 1.0, "global_step": 946, "grad_norm": 2.246307562871448, "learning_rate": 1.6311406568833768e-06, "loss": 0.4699, "step": 946 }, { "ETA": 4.14, "epoch": 0.30455057083132336, "fp16_scale": 1.0, "global_step": 947, "grad_norm": 1.7991526158996067, "learning_rate": 1.6303320775352076e-06, "loss": 0.3842, "step": 947 }, { "ETA": 4.14, "epoch": 0.30487216594307764, "fp16_scale": 1.0, "global_step": 948, "grad_norm": 2.130316177876793, "learning_rate": 1.6295228138114943e-06, "loss": 0.4786, "step": 948 }, { "ETA": 4.14, "epoch": 0.305193761054832, "fp16_scale": 1.0, "global_step": 949, "grad_norm": 1.936392005000023, "learning_rate": 1.628712866590885e-06, "loss": 0.3948, "step": 949 }, { "ETA": 4.13, "epoch": 0.30551535616658626, "fp16_scale": 1.0, "global_step": 950, "grad_norm": 1.867987765910443, "learning_rate": 1.62790223675277e-06, "loss": 0.4887, "step": 950 }, { "ETA": 4.13, "epoch": 0.3058369512783406, "fp16_scale": 1.0, "global_step": 951, "grad_norm": 2.093894916092837, "learning_rate": 1.6270909251772813e-06, "loss": 0.387, "step": 951 }, { "ETA": 4.13, "epoch": 0.3061585463900949, "fp16_scale": 1.0, "global_step": 952, "grad_norm": 1.863470738012386, "learning_rate": 1.6262789327452903e-06, "loss": 0.4035, "step": 952 }, { "ETA": 4.12, "epoch": 0.30648014150184916, "fp16_scale": 1.0, "global_step": 953, "grad_norm": 2.059524569754237, "learning_rate": 1.625466260338409e-06, "loss": 0.4444, "step": 953 }, { "ETA": 4.12, "epoch": 0.3068017366136035, "fp16_scale": 1.0, "global_step": 954, "grad_norm": 2.0355267166690236, "learning_rate": 1.6246529088389865e-06, "loss": 0.4091, "step": 954 }, { "ETA": 4.12, "epoch": 0.3071233317253578, "fp16_scale": 1.0, "global_step": 955, "grad_norm": 2.108593636620605, "learning_rate": 1.6238388791301088e-06, "loss": 0.4331, "step": 955 }, { "ETA": 4.12, "epoch": 0.30744492683711205, "fp16_scale": 1.0, "global_step": 956, "grad_norm": 1.9504079277887791, "learning_rate": 1.6230241720955995e-06, "loss": 0.463, "step": 956 }, { "ETA": 4.12, "epoch": 0.3077665219488664, "fp16_scale": 1.0, "global_step": 957, "grad_norm": 1.9096490823504333, "learning_rate": 1.6222087886200171e-06, "loss": 0.4059, "step": 957 }, { "ETA": 4.12, "epoch": 0.30808811706062067, "fp16_scale": 1.0, "global_step": 958, "grad_norm": 1.8211137389504628, "learning_rate": 1.6213927295886545e-06, "loss": 0.3819, "step": 958 }, { "ETA": 4.12, "epoch": 0.308409712172375, "fp16_scale": 1.0, "global_step": 959, "grad_norm": 1.792072571331639, "learning_rate": 1.6205759958875377e-06, "loss": 0.4385, "step": 959 }, { "ETA": 4.11, "epoch": 0.3087313072841293, "fp16_scale": 1.0, "global_step": 960, "grad_norm": 1.7783900392994196, "learning_rate": 1.6197585884034264e-06, "loss": 0.3853, "step": 960 }, { "ETA": 4.11, "epoch": 0.30905290239588357, "fp16_scale": 1.0, "global_step": 961, "grad_norm": 2.1772217828206615, "learning_rate": 1.61894050802381e-06, "loss": 0.4426, "step": 961 }, { "ETA": 4.11, "epoch": 0.3093744975076379, "fp16_scale": 1.0, "global_step": 962, "grad_norm": 2.29495700071546, "learning_rate": 1.6181217556369102e-06, "loss": 0.4184, "step": 962 }, { "ETA": 4.11, "epoch": 0.3096960926193922, "fp16_scale": 1.0, "global_step": 963, "grad_norm": 2.200297165457558, "learning_rate": 1.6173023321316774e-06, "loss": 0.393, "step": 963 }, { "ETA": 4.1, "epoch": 0.31001768773114646, "fp16_scale": 1.0, "global_step": 964, "grad_norm": 2.3014226603894556, "learning_rate": 1.6164822383977913e-06, "loss": 0.4762, "step": 964 }, { "ETA": 4.1, "epoch": 0.3103392828429008, "fp16_scale": 1.0, "global_step": 965, "grad_norm": 2.043302691249108, "learning_rate": 1.615661475325658e-06, "loss": 0.499, "step": 965 }, { "ETA": 4.1, "epoch": 0.3106608779546551, "fp16_scale": 1.0, "global_step": 966, "grad_norm": 2.0101875742210735, "learning_rate": 1.6148400438064125e-06, "loss": 0.439, "step": 966 }, { "ETA": 4.1, "epoch": 0.3109824730664094, "fp16_scale": 1.0, "global_step": 967, "grad_norm": 2.106040675309942, "learning_rate": 1.6140179447319131e-06, "loss": 0.4656, "step": 967 }, { "ETA": 4.09, "epoch": 0.3113040681781637, "fp16_scale": 1.0, "global_step": 968, "grad_norm": 2.4306980079928797, "learning_rate": 1.6131951789947449e-06, "loss": 0.4361, "step": 968 }, { "ETA": 4.09, "epoch": 0.311625663289918, "fp16_scale": 1.0, "global_step": 969, "grad_norm": 1.9857925796900955, "learning_rate": 1.6123717474882155e-06, "loss": 0.4077, "step": 969 }, { "ETA": 4.09, "epoch": 0.3119472584016723, "fp16_scale": 1.0, "global_step": 970, "grad_norm": 1.9924019384133027, "learning_rate": 1.6115476511063561e-06, "loss": 0.4858, "step": 970 }, { "ETA": 4.09, "epoch": 0.3122688535134266, "fp16_scale": 1.0, "global_step": 971, "grad_norm": 1.764631728406469, "learning_rate": 1.6107228907439195e-06, "loss": 0.4526, "step": 971 }, { "ETA": 4.09, "epoch": 0.3125904486251809, "fp16_scale": 1.0, "global_step": 972, "grad_norm": 1.9406733716313436, "learning_rate": 1.6098974672963794e-06, "loss": 0.4555, "step": 972 }, { "ETA": 4.08, "epoch": 0.3129120437369352, "fp16_scale": 1.0, "global_step": 973, "grad_norm": 1.8258599665084803, "learning_rate": 1.6090713816599293e-06, "loss": 0.4453, "step": 973 }, { "ETA": 4.08, "epoch": 0.3132336388486895, "fp16_scale": 1.0, "global_step": 974, "grad_norm": 2.208412865540917, "learning_rate": 1.608244634731482e-06, "loss": 0.4072, "step": 974 }, { "ETA": 4.08, "epoch": 0.3135552339604438, "fp16_scale": 1.0, "global_step": 975, "grad_norm": 1.9984530555239954, "learning_rate": 1.6074172274086684e-06, "loss": 0.4352, "step": 975 }, { "ETA": 4.08, "epoch": 0.3138768290721981, "fp16_scale": 1.0, "global_step": 976, "grad_norm": 2.258343156213296, "learning_rate": 1.6065891605898357e-06, "loss": 0.4681, "step": 976 }, { "ETA": 4.08, "epoch": 0.3141984241839524, "fp16_scale": 1.0, "global_step": 977, "grad_norm": 1.7754507487479707, "learning_rate": 1.605760435174048e-06, "loss": 0.446, "step": 977 }, { "ETA": 4.07, "epoch": 0.3145200192957067, "fp16_scale": 1.0, "global_step": 978, "grad_norm": 1.942260767564616, "learning_rate": 1.6049310520610834e-06, "loss": 0.4362, "step": 978 }, { "ETA": 4.07, "epoch": 0.314841614407461, "fp16_scale": 1.0, "global_step": 979, "grad_norm": 1.9848142352329132, "learning_rate": 1.6041010121514356e-06, "loss": 0.4035, "step": 979 }, { "ETA": 4.07, "epoch": 0.3151632095192153, "fp16_scale": 1.0, "global_step": 980, "grad_norm": 1.8758060404416177, "learning_rate": 1.6032703163463097e-06, "loss": 0.4265, "step": 980 }, { "ETA": 4.07, "epoch": 0.3154848046309696, "fp16_scale": 1.0, "global_step": 981, "grad_norm": 2.0904424084804605, "learning_rate": 1.6024389655476245e-06, "loss": 0.3721, "step": 981 }, { "ETA": 4.07, "epoch": 0.3158063997427239, "fp16_scale": 1.0, "global_step": 982, "grad_norm": 2.0345875745531536, "learning_rate": 1.6016069606580087e-06, "loss": 0.4663, "step": 982 }, { "ETA": 4.07, "epoch": 0.31612799485447823, "fp16_scale": 1.0, "global_step": 983, "grad_norm": 1.9393973294357287, "learning_rate": 1.600774302580802e-06, "loss": 0.4289, "step": 983 }, { "ETA": 4.06, "epoch": 0.3164495899662325, "fp16_scale": 1.0, "global_step": 984, "grad_norm": 2.183008095835992, "learning_rate": 1.599940992220053e-06, "loss": 0.4253, "step": 984 }, { "ETA": 4.06, "epoch": 0.3167711850779868, "fp16_scale": 1.0, "global_step": 985, "grad_norm": 1.8314046538913937, "learning_rate": 1.5991070304805181e-06, "loss": 0.5015, "step": 985 }, { "ETA": 4.06, "epoch": 0.31709278018974113, "fp16_scale": 1.0, "global_step": 986, "grad_norm": 1.7814409496791646, "learning_rate": 1.598272418267662e-06, "loss": 0.4227, "step": 986 }, { "ETA": 4.06, "epoch": 0.3174143753014954, "fp16_scale": 1.0, "global_step": 987, "grad_norm": 1.98942113818272, "learning_rate": 1.597437156487654e-06, "loss": 0.4697, "step": 987 }, { "ETA": 4.06, "epoch": 0.3177359704132497, "fp16_scale": 1.0, "global_step": 988, "grad_norm": 2.2036326117118463, "learning_rate": 1.5966012460473705e-06, "loss": 0.4297, "step": 988 }, { "ETA": 4.05, "epoch": 0.318057565525004, "fp16_scale": 1.0, "global_step": 989, "grad_norm": 2.0946812395760563, "learning_rate": 1.5957646878543913e-06, "loss": 0.4558, "step": 989 }, { "ETA": 4.05, "epoch": 0.3183791606367583, "fp16_scale": 1.0, "global_step": 990, "grad_norm": 1.9755285676762397, "learning_rate": 1.594927482816999e-06, "loss": 0.4587, "step": 990 }, { "ETA": 4.05, "epoch": 0.31870075574851264, "fp16_scale": 1.0, "global_step": 991, "grad_norm": 1.7781947949105377, "learning_rate": 1.594089631844179e-06, "loss": 0.4095, "step": 991 }, { "ETA": 4.05, "epoch": 0.3190223508602669, "fp16_scale": 1.0, "global_step": 992, "grad_norm": 1.8978851692947698, "learning_rate": 1.5932511358456183e-06, "loss": 0.3714, "step": 992 }, { "ETA": 4.04, "epoch": 0.3193439459720212, "fp16_scale": 1.0, "global_step": 993, "grad_norm": 2.015619886492585, "learning_rate": 1.5924119957317039e-06, "loss": 0.4191, "step": 993 }, { "ETA": 4.04, "epoch": 0.31966554108377554, "fp16_scale": 1.0, "global_step": 994, "grad_norm": 1.8226320334133386, "learning_rate": 1.5915722124135225e-06, "loss": 0.4725, "step": 994 }, { "ETA": 4.04, "epoch": 0.3199871361955298, "fp16_scale": 1.0, "global_step": 995, "grad_norm": 2.1068079375079214, "learning_rate": 1.590731786802858e-06, "loss": 0.399, "step": 995 }, { "ETA": 4.04, "epoch": 0.32030873130728416, "fp16_scale": 1.0, "global_step": 996, "grad_norm": 2.105917564042965, "learning_rate": 1.589890719812193e-06, "loss": 0.3695, "step": 996 }, { "ETA": 4.04, "epoch": 0.32063032641903844, "fp16_scale": 1.0, "global_step": 997, "grad_norm": 1.9618509350466506, "learning_rate": 1.589049012354706e-06, "loss": 0.4866, "step": 997 }, { "ETA": 4.03, "epoch": 0.3209519215307927, "fp16_scale": 1.0, "global_step": 998, "grad_norm": 1.9767032515310012, "learning_rate": 1.5882066653442706e-06, "loss": 0.4888, "step": 998 }, { "ETA": 4.03, "epoch": 0.32127351664254705, "fp16_scale": 1.0, "global_step": 999, "grad_norm": 1.9047943312129536, "learning_rate": 1.5873636796954554e-06, "loss": 0.4402, "step": 999 }, { "ETA": 4.03, "epoch": 0.32159511175430133, "fp16_scale": 1.0, "global_step": 1000, "grad_norm": 1.8852021138237236, "learning_rate": 1.586520056323522e-06, "loss": 0.4348, "step": 1000 }, { "ETA": 4.05, "epoch": 0.3219167068660556, "fp16_scale": 1.0, "global_step": 1001, "grad_norm": 1.9352276953564838, "learning_rate": 1.585675796144424e-06, "loss": 0.4532, "step": 1001 }, { "ETA": 4.04, "epoch": 0.32223830197780995, "fp16_scale": 1.0, "global_step": 1002, "grad_norm": 1.8620206832959252, "learning_rate": 1.5848309000748073e-06, "loss": 0.4039, "step": 1002 }, { "ETA": 4.04, "epoch": 0.32255989708956423, "fp16_scale": 1.0, "global_step": 1003, "grad_norm": 1.8484776743122808, "learning_rate": 1.5839853690320072e-06, "loss": 0.4167, "step": 1003 }, { "ETA": 4.04, "epoch": 0.32288149220131857, "fp16_scale": 1.0, "global_step": 1004, "grad_norm": 1.8194759672775325, "learning_rate": 1.5831392039340496e-06, "loss": 0.4378, "step": 1004 }, { "ETA": 4.04, "epoch": 0.32320308731307285, "fp16_scale": 1.0, "global_step": 1005, "grad_norm": 2.193159959257116, "learning_rate": 1.582292405699648e-06, "loss": 0.4444, "step": 1005 }, { "ETA": 4.04, "epoch": 0.3235246824248271, "fp16_scale": 1.0, "global_step": 1006, "grad_norm": 2.0237663042766583, "learning_rate": 1.5814449752482029e-06, "loss": 0.4066, "step": 1006 }, { "ETA": 4.03, "epoch": 0.32384627753658146, "fp16_scale": 1.0, "global_step": 1007, "grad_norm": 1.9393019775060167, "learning_rate": 1.5805969134998027e-06, "loss": 0.5277, "step": 1007 }, { "ETA": 4.03, "epoch": 0.32416787264833574, "fp16_scale": 1.0, "global_step": 1008, "grad_norm": 1.909534410238616, "learning_rate": 1.5797482213752197e-06, "loss": 0.3949, "step": 1008 }, { "ETA": 4.03, "epoch": 0.32448946776009, "fp16_scale": 1.0, "global_step": 1009, "grad_norm": 1.8643349654376014, "learning_rate": 1.5788988997959115e-06, "loss": 0.3841, "step": 1009 }, { "ETA": 4.02, "epoch": 0.32481106287184436, "fp16_scale": 1.0, "global_step": 1010, "grad_norm": 2.1193315527683856, "learning_rate": 1.5780489496840189e-06, "loss": 0.3753, "step": 1010 }, { "ETA": 4.02, "epoch": 0.32513265798359864, "fp16_scale": 1.0, "global_step": 1011, "grad_norm": 2.0664476418474615, "learning_rate": 1.577198371962365e-06, "loss": 0.4472, "step": 1011 }, { "ETA": 4.02, "epoch": 0.325454253095353, "fp16_scale": 1.0, "global_step": 1012, "grad_norm": 2.0255892118055914, "learning_rate": 1.5763471675544546e-06, "loss": 0.393, "step": 1012 }, { "ETA": 4.01, "epoch": 0.32577584820710725, "fp16_scale": 1.0, "global_step": 1013, "grad_norm": 2.3105379871365797, "learning_rate": 1.5754953373844728e-06, "loss": 0.3603, "step": 1013 }, { "ETA": 4.01, "epoch": 0.32609744331886154, "fp16_scale": 1.0, "global_step": 1014, "grad_norm": 2.155938610942582, "learning_rate": 1.5746428823772836e-06, "loss": 0.4213, "step": 1014 }, { "ETA": 4.01, "epoch": 0.32641903843061587, "fp16_scale": 1.0, "global_step": 1015, "grad_norm": 2.244868296413816, "learning_rate": 1.5737898034584306e-06, "loss": 0.4233, "step": 1015 }, { "ETA": 4.01, "epoch": 0.32674063354237015, "fp16_scale": 1.0, "global_step": 1016, "grad_norm": 2.238154376167949, "learning_rate": 1.5729361015541332e-06, "loss": 0.4664, "step": 1016 }, { "ETA": 4.01, "epoch": 0.32706222865412443, "fp16_scale": 1.0, "global_step": 1017, "grad_norm": 2.4905645200147974, "learning_rate": 1.5720817775912885e-06, "loss": 0.4933, "step": 1017 }, { "ETA": 4.01, "epoch": 0.32738382376587877, "fp16_scale": 1.0, "global_step": 1018, "grad_norm": 2.2008658266116137, "learning_rate": 1.5712268324974688e-06, "loss": 0.4987, "step": 1018 }, { "ETA": 4.0, "epoch": 0.32770541887763305, "fp16_scale": 1.0, "global_step": 1019, "grad_norm": 2.064184305504791, "learning_rate": 1.5703712672009205e-06, "loss": 0.4306, "step": 1019 }, { "ETA": 4.0, "epoch": 0.3280270139893874, "fp16_scale": 1.0, "global_step": 1020, "grad_norm": 1.8987392916269317, "learning_rate": 1.5695150826305631e-06, "loss": 0.4248, "step": 1020 }, { "ETA": 4.0, "epoch": 0.32834860910114166, "fp16_scale": 1.0, "global_step": 1021, "grad_norm": 1.9675675056417445, "learning_rate": 1.568658279715989e-06, "loss": 0.5002, "step": 1021 }, { "ETA": 4.0, "epoch": 0.32867020421289594, "fp16_scale": 1.0, "global_step": 1022, "grad_norm": 2.0582941033337847, "learning_rate": 1.5678008593874622e-06, "loss": 0.4571, "step": 1022 }, { "ETA": 4.0, "epoch": 0.3289917993246503, "fp16_scale": 1.0, "global_step": 1023, "grad_norm": 2.0164501840616236, "learning_rate": 1.5669428225759158e-06, "loss": 0.5116, "step": 1023 }, { "ETA": 4.0, "epoch": 0.32931339443640456, "fp16_scale": 1.0, "global_step": 1024, "grad_norm": 2.1105696147832704, "learning_rate": 1.566084170212953e-06, "loss": 0.497, "step": 1024 }, { "ETA": 3.99, "epoch": 0.32963498954815884, "fp16_scale": 1.0, "global_step": 1025, "grad_norm": 1.829267150650572, "learning_rate": 1.5652249032308462e-06, "loss": 0.4634, "step": 1025 }, { "ETA": 3.99, "epoch": 0.3299565846599132, "fp16_scale": 1.0, "global_step": 1026, "grad_norm": 2.3222584826810238, "learning_rate": 1.5643650225625338e-06, "loss": 0.4429, "step": 1026 }, { "ETA": 3.99, "epoch": 0.33027817977166746, "fp16_scale": 1.0, "global_step": 1027, "grad_norm": 2.0208817981696914, "learning_rate": 1.563504529141621e-06, "loss": 0.4529, "step": 1027 }, { "ETA": 3.99, "epoch": 0.3305997748834218, "fp16_scale": 1.0, "global_step": 1028, "grad_norm": 2.224391812227433, "learning_rate": 1.5626434239023782e-06, "loss": 0.4128, "step": 1028 }, { "ETA": 3.98, "epoch": 0.3309213699951761, "fp16_scale": 1.0, "global_step": 1029, "grad_norm": 1.8325687051324988, "learning_rate": 1.5617817077797405e-06, "loss": 0.4281, "step": 1029 }, { "ETA": 3.98, "epoch": 0.33124296510693035, "fp16_scale": 1.0, "global_step": 1030, "grad_norm": 1.9386730764049669, "learning_rate": 1.5609193817093057e-06, "loss": 0.4135, "step": 1030 }, { "ETA": 3.98, "epoch": 0.3315645602186847, "fp16_scale": 1.0, "global_step": 1031, "grad_norm": 2.1193501727587174, "learning_rate": 1.560056446627334e-06, "loss": 0.3975, "step": 1031 }, { "ETA": 3.98, "epoch": 0.33188615533043897, "fp16_scale": 1.0, "global_step": 1032, "grad_norm": 2.0763726603941297, "learning_rate": 1.5591929034707466e-06, "loss": 0.4187, "step": 1032 }, { "ETA": 3.97, "epoch": 0.3322077504421933, "fp16_scale": 1.0, "global_step": 1033, "grad_norm": 2.0414959466932885, "learning_rate": 1.558328753177126e-06, "loss": 0.5019, "step": 1033 }, { "ETA": 3.97, "epoch": 0.3325293455539476, "fp16_scale": 1.0, "global_step": 1034, "grad_norm": 1.8954899661587834, "learning_rate": 1.5574639966847126e-06, "loss": 0.473, "step": 1034 }, { "ETA": 3.97, "epoch": 0.33285094066570187, "fp16_scale": 1.0, "global_step": 1035, "grad_norm": 1.9437209965979045, "learning_rate": 1.5565986349324054e-06, "loss": 0.4092, "step": 1035 }, { "ETA": 3.97, "epoch": 0.3331725357774562, "fp16_scale": 1.0, "global_step": 1036, "grad_norm": 1.94681689816743, "learning_rate": 1.5557326688597608e-06, "loss": 0.3887, "step": 1036 }, { "ETA": 3.97, "epoch": 0.3334941308892105, "fp16_scale": 1.0, "global_step": 1037, "grad_norm": 1.9569567697320496, "learning_rate": 1.5548660994069907e-06, "loss": 0.401, "step": 1037 }, { "ETA": 3.96, "epoch": 0.33381572600096476, "fp16_scale": 1.0, "global_step": 1038, "grad_norm": 2.205854240770515, "learning_rate": 1.5539989275149629e-06, "loss": 0.4019, "step": 1038 }, { "ETA": 3.96, "epoch": 0.3341373211127191, "fp16_scale": 1.0, "global_step": 1039, "grad_norm": 2.3043794541422664, "learning_rate": 1.5531311541251992e-06, "loss": 0.3857, "step": 1039 }, { "ETA": 3.96, "epoch": 0.3344589162244734, "fp16_scale": 1.0, "global_step": 1040, "grad_norm": 1.7645749873807048, "learning_rate": 1.5522627801798743e-06, "loss": 0.4517, "step": 1040 }, { "ETA": 3.96, "epoch": 0.3347805113362277, "fp16_scale": 1.0, "global_step": 1041, "grad_norm": 2.0863750253291182, "learning_rate": 1.5513938066218142e-06, "loss": 0.487, "step": 1041 }, { "ETA": 3.95, "epoch": 0.335102106447982, "fp16_scale": 1.0, "global_step": 1042, "grad_norm": 1.8860674534398028, "learning_rate": 1.550524234394497e-06, "loss": 0.4165, "step": 1042 }, { "ETA": 3.95, "epoch": 0.3354237015597363, "fp16_scale": 1.0, "global_step": 1043, "grad_norm": 2.0024574398746555, "learning_rate": 1.5496540644420502e-06, "loss": 0.4953, "step": 1043 }, { "ETA": 3.95, "epoch": 0.3357452966714906, "fp16_scale": 1.0, "global_step": 1044, "grad_norm": 1.803225260799288, "learning_rate": 1.5487832977092507e-06, "loss": 0.4748, "step": 1044 }, { "ETA": 3.95, "epoch": 0.3360668917832449, "fp16_scale": 1.0, "global_step": 1045, "grad_norm": 2.0544413463818794, "learning_rate": 1.547911935141523e-06, "loss": 0.3531, "step": 1045 }, { "ETA": 3.95, "epoch": 0.3363884868949992, "fp16_scale": 1.0, "global_step": 1046, "grad_norm": 1.880475734878519, "learning_rate": 1.5470399776849386e-06, "loss": 0.4196, "step": 1046 }, { "ETA": 3.94, "epoch": 0.3367100820067535, "fp16_scale": 1.0, "global_step": 1047, "grad_norm": 1.8844368312463697, "learning_rate": 1.5461674262862147e-06, "loss": 0.4545, "step": 1047 }, { "ETA": 3.94, "epoch": 0.3370316771185078, "fp16_scale": 1.0, "global_step": 1048, "grad_norm": 1.950863553954552, "learning_rate": 1.5452942818927142e-06, "loss": 0.4526, "step": 1048 }, { "ETA": 3.94, "epoch": 0.3373532722302621, "fp16_scale": 1.0, "global_step": 1049, "grad_norm": 2.0667239701990514, "learning_rate": 1.5444205454524427e-06, "loss": 0.3974, "step": 1049 }, { "ETA": 3.94, "epoch": 0.3376748673420164, "fp16_scale": 1.0, "global_step": 1050, "grad_norm": 1.7967936383477858, "learning_rate": 1.543546217914049e-06, "loss": 0.4819, "step": 1050 }, { "ETA": 3.94, "epoch": 0.3379964624537707, "fp16_scale": 1.0, "global_step": 1051, "grad_norm": 1.8505390025145074, "learning_rate": 1.5426713002268246e-06, "loss": 0.4628, "step": 1051 }, { "ETA": 3.93, "epoch": 0.338318057565525, "fp16_scale": 1.0, "global_step": 1052, "grad_norm": 2.6895805870985363, "learning_rate": 1.5417957933407005e-06, "loss": 0.3935, "step": 1052 }, { "ETA": 3.93, "epoch": 0.3386396526772793, "fp16_scale": 1.0, "global_step": 1053, "grad_norm": 2.0481066115640547, "learning_rate": 1.5409196982062475e-06, "loss": 0.3689, "step": 1053 }, { "ETA": 3.93, "epoch": 0.3389612477890336, "fp16_scale": 1.0, "global_step": 1054, "grad_norm": 2.0247938745469463, "learning_rate": 1.5400430157746755e-06, "loss": 0.3837, "step": 1054 }, { "ETA": 3.92, "epoch": 0.3392828429007879, "fp16_scale": 1.0, "global_step": 1055, "grad_norm": 2.1157710086241814, "learning_rate": 1.539165746997833e-06, "loss": 0.4054, "step": 1055 }, { "ETA": 3.92, "epoch": 0.3396044380125422, "fp16_scale": 1.0, "global_step": 1056, "grad_norm": 2.040310105981162, "learning_rate": 1.5382878928282028e-06, "loss": 0.4058, "step": 1056 }, { "ETA": 3.92, "epoch": 0.33992603312429653, "fp16_scale": 1.0, "global_step": 1057, "grad_norm": 1.9898876908794625, "learning_rate": 1.5374094542189054e-06, "loss": 0.4029, "step": 1057 }, { "ETA": 3.92, "epoch": 0.3402476282360508, "fp16_scale": 1.0, "global_step": 1058, "grad_norm": 1.864298916622204, "learning_rate": 1.536530432123695e-06, "loss": 0.491, "step": 1058 }, { "ETA": 3.92, "epoch": 0.3405692233478051, "fp16_scale": 1.0, "global_step": 1059, "grad_norm": 2.0137095046053894, "learning_rate": 1.5356508274969593e-06, "loss": 0.4754, "step": 1059 }, { "ETA": 3.91, "epoch": 0.34089081845955943, "fp16_scale": 1.0, "global_step": 1060, "grad_norm": 1.9153282169539023, "learning_rate": 1.5347706412937184e-06, "loss": 0.4654, "step": 1060 }, { "ETA": 3.91, "epoch": 0.3412124135713137, "fp16_scale": 1.0, "global_step": 1061, "grad_norm": 1.7275404045564595, "learning_rate": 1.533889874469624e-06, "loss": 0.3665, "step": 1061 }, { "ETA": 3.91, "epoch": 0.341534008683068, "fp16_scale": 1.0, "global_step": 1062, "grad_norm": 2.1859526858656197, "learning_rate": 1.533008527980958e-06, "loss": 0.4385, "step": 1062 }, { "ETA": 3.91, "epoch": 0.3418556037948223, "fp16_scale": 1.0, "global_step": 1063, "grad_norm": 2.1233723143512244, "learning_rate": 1.5321266027846327e-06, "loss": 0.4719, "step": 1063 }, { "ETA": 3.91, "epoch": 0.3421771989065766, "fp16_scale": 1.0, "global_step": 1064, "grad_norm": 2.0800026111171586, "learning_rate": 1.531244099838187e-06, "loss": 0.4873, "step": 1064 }, { "ETA": 3.91, "epoch": 0.34249879401833094, "fp16_scale": 1.0, "global_step": 1065, "grad_norm": 2.168644537014544, "learning_rate": 1.5303610200997882e-06, "loss": 0.472, "step": 1065 }, { "ETA": 3.9, "epoch": 0.3428203891300852, "fp16_scale": 1.0, "global_step": 1066, "grad_norm": 1.9707768684020286, "learning_rate": 1.5294773645282296e-06, "loss": 0.4336, "step": 1066 }, { "ETA": 3.9, "epoch": 0.3431419842418395, "fp16_scale": 1.0, "global_step": 1067, "grad_norm": 1.897045422023728, "learning_rate": 1.52859313408293e-06, "loss": 0.4241, "step": 1067 }, { "ETA": 3.9, "epoch": 0.34346357935359384, "fp16_scale": 1.0, "global_step": 1068, "grad_norm": 2.0755770205202766, "learning_rate": 1.5277083297239318e-06, "loss": 0.3643, "step": 1068 }, { "ETA": 3.9, "epoch": 0.3437851744653481, "fp16_scale": 1.0, "global_step": 1069, "grad_norm": 2.116766495277426, "learning_rate": 1.5268229524119004e-06, "loss": 0.3479, "step": 1069 }, { "ETA": 3.89, "epoch": 0.3441067695771024, "fp16_scale": 1.0, "global_step": 1070, "grad_norm": 2.0118423629926996, "learning_rate": 1.5259370031081248e-06, "loss": 0.3794, "step": 1070 }, { "ETA": 3.89, "epoch": 0.34442836468885674, "fp16_scale": 1.0, "global_step": 1071, "grad_norm": 2.300985748426234, "learning_rate": 1.5250504827745127e-06, "loss": 0.4406, "step": 1071 }, { "ETA": 3.89, "epoch": 0.344749959800611, "fp16_scale": 1.0, "global_step": 1072, "grad_norm": 2.0931326714632, "learning_rate": 1.5241633923735938e-06, "loss": 0.4, "step": 1072 }, { "ETA": 3.88, "epoch": 0.34507155491236535, "fp16_scale": 1.0, "global_step": 1073, "grad_norm": 1.8854353625333264, "learning_rate": 1.5232757328685151e-06, "loss": 0.3722, "step": 1073 }, { "ETA": 3.88, "epoch": 0.34539315002411963, "fp16_scale": 1.0, "global_step": 1074, "grad_norm": 1.8720316814062665, "learning_rate": 1.5223875052230436e-06, "loss": 0.4518, "step": 1074 }, { "ETA": 3.88, "epoch": 0.3457147451358739, "fp16_scale": 1.0, "global_step": 1075, "grad_norm": 2.2590433808420953, "learning_rate": 1.521498710401561e-06, "loss": 0.4969, "step": 1075 }, { "ETA": 3.88, "epoch": 0.34603634024762825, "fp16_scale": 1.0, "global_step": 1076, "grad_norm": 1.9492131114052138, "learning_rate": 1.5206093493690652e-06, "loss": 0.4088, "step": 1076 }, { "ETA": 3.88, "epoch": 0.34635793535938253, "fp16_scale": 1.0, "global_step": 1077, "grad_norm": 1.7446844440137887, "learning_rate": 1.5197194230911705e-06, "loss": 0.4018, "step": 1077 }, { "ETA": 3.88, "epoch": 0.34667953047113687, "fp16_scale": 1.0, "global_step": 1078, "grad_norm": 2.1056058348615556, "learning_rate": 1.5188289325341033e-06, "loss": 0.4783, "step": 1078 }, { "ETA": 3.87, "epoch": 0.34700112558289115, "fp16_scale": 1.0, "global_step": 1079, "grad_norm": 2.154899487781416, "learning_rate": 1.5179378786647026e-06, "loss": 0.4482, "step": 1079 }, { "ETA": 3.87, "epoch": 0.3473227206946454, "fp16_scale": 1.0, "global_step": 1080, "grad_norm": 1.8829126410069628, "learning_rate": 1.5170462624504203e-06, "loss": 0.4027, "step": 1080 }, { "ETA": 3.87, "epoch": 0.34764431580639976, "fp16_scale": 1.0, "global_step": 1081, "grad_norm": 1.9589407377100947, "learning_rate": 1.516154084859318e-06, "loss": 0.4548, "step": 1081 }, { "ETA": 3.87, "epoch": 0.34796591091815404, "fp16_scale": 1.0, "global_step": 1082, "grad_norm": 2.2242892428624144, "learning_rate": 1.5152613468600661e-06, "loss": 0.4382, "step": 1082 }, { "ETA": 3.87, "epoch": 0.3482875060299083, "fp16_scale": 1.0, "global_step": 1083, "grad_norm": 1.8970223057811344, "learning_rate": 1.5143680494219453e-06, "loss": 0.422, "step": 1083 }, { "ETA": 3.87, "epoch": 0.34860910114166266, "fp16_scale": 1.0, "global_step": 1084, "grad_norm": 1.7959123082952548, "learning_rate": 1.5134741935148417e-06, "loss": 0.447, "step": 1084 }, { "ETA": 3.86, "epoch": 0.34893069625341694, "fp16_scale": 1.0, "global_step": 1085, "grad_norm": 2.1068985939099196, "learning_rate": 1.5125797801092497e-06, "loss": 0.4406, "step": 1085 }, { "ETA": 3.86, "epoch": 0.3492522913651713, "fp16_scale": 1.0, "global_step": 1086, "grad_norm": 2.0786396617899117, "learning_rate": 1.5116848101762672e-06, "loss": 0.4329, "step": 1086 }, { "ETA": 3.86, "epoch": 0.34957388647692555, "fp16_scale": 1.0, "global_step": 1087, "grad_norm": 1.908634889904505, "learning_rate": 1.5107892846875973e-06, "loss": 0.4254, "step": 1087 }, { "ETA": 3.86, "epoch": 0.34989548158867984, "fp16_scale": 1.0, "global_step": 1088, "grad_norm": 2.223620445451567, "learning_rate": 1.5098932046155463e-06, "loss": 0.4824, "step": 1088 }, { "ETA": 3.86, "epoch": 0.35021707670043417, "fp16_scale": 1.0, "global_step": 1089, "grad_norm": 2.061418506233444, "learning_rate": 1.5089965709330226e-06, "loss": 0.4609, "step": 1089 }, { "ETA": 3.85, "epoch": 0.35053867181218845, "fp16_scale": 1.0, "global_step": 1090, "grad_norm": 2.0055627953071156, "learning_rate": 1.5080993846135349e-06, "loss": 0.4284, "step": 1090 }, { "ETA": 3.85, "epoch": 0.35086026692394273, "fp16_scale": 1.0, "global_step": 1091, "grad_norm": 1.8718837955396956, "learning_rate": 1.5072016466311933e-06, "loss": 0.4143, "step": 1091 }, { "ETA": 3.85, "epoch": 0.35118186203569707, "fp16_scale": 1.0, "global_step": 1092, "grad_norm": 2.0265850308325635, "learning_rate": 1.506303357960706e-06, "loss": 0.4691, "step": 1092 }, { "ETA": 3.85, "epoch": 0.35150345714745135, "fp16_scale": 1.0, "global_step": 1093, "grad_norm": 1.797624163727888, "learning_rate": 1.5054045195773787e-06, "loss": 0.3936, "step": 1093 }, { "ETA": 3.85, "epoch": 0.3518250522592057, "fp16_scale": 1.0, "global_step": 1094, "grad_norm": 1.9244718608465112, "learning_rate": 1.504505132457115e-06, "loss": 0.44, "step": 1094 }, { "ETA": 3.85, "epoch": 0.35214664737095996, "fp16_scale": 1.0, "global_step": 1095, "grad_norm": 1.8827263655898996, "learning_rate": 1.5036051975764133e-06, "loss": 0.497, "step": 1095 }, { "ETA": 3.84, "epoch": 0.35246824248271424, "fp16_scale": 1.0, "global_step": 1096, "grad_norm": 2.06998781133535, "learning_rate": 1.5027047159123676e-06, "loss": 0.4381, "step": 1096 }, { "ETA": 3.84, "epoch": 0.3527898375944686, "fp16_scale": 1.0, "global_step": 1097, "grad_norm": 1.9529943154306728, "learning_rate": 1.5018036884426651e-06, "loss": 0.4654, "step": 1097 }, { "ETA": 3.84, "epoch": 0.35311143270622286, "fp16_scale": 1.0, "global_step": 1098, "grad_norm": 2.102138183876875, "learning_rate": 1.500902116145585e-06, "loss": 0.3862, "step": 1098 }, { "ETA": 3.84, "epoch": 0.35343302781797714, "fp16_scale": 1.0, "global_step": 1099, "grad_norm": 2.1366269842497507, "learning_rate": 1.5e-06, "loss": 0.3634, "step": 1099 }, { "ETA": 3.83, "epoch": 0.3537546229297315, "fp16_scale": 1.0, "global_step": 1100, "grad_norm": 1.9739836451666781, "learning_rate": 1.4990973409853709e-06, "loss": 0.385, "step": 1100 }, { "ETA": 3.83, "epoch": 0.35407621804148576, "fp16_scale": 1.0, "global_step": 1101, "grad_norm": 2.0774369345981905, "learning_rate": 1.4981941400817489e-06, "loss": 0.4901, "step": 1101 }, { "ETA": 3.83, "epoch": 0.3543978131532401, "fp16_scale": 1.0, "global_step": 1102, "grad_norm": 1.991955578669468, "learning_rate": 1.4972903982697742e-06, "loss": 0.483, "step": 1102 }, { "ETA": 3.83, "epoch": 0.3547194082649944, "fp16_scale": 1.0, "global_step": 1103, "grad_norm": 1.9843357649460551, "learning_rate": 1.4963861165306736e-06, "loss": 0.4622, "step": 1103 }, { "ETA": 3.83, "epoch": 0.35504100337674865, "fp16_scale": 1.0, "global_step": 1104, "grad_norm": 1.7678654457273422, "learning_rate": 1.4954812958462597e-06, "loss": 0.3585, "step": 1104 }, { "ETA": 3.82, "epoch": 0.355362598488503, "fp16_scale": 1.0, "global_step": 1105, "grad_norm": 1.894926407127137, "learning_rate": 1.4945759371989315e-06, "loss": 0.3916, "step": 1105 }, { "ETA": 3.82, "epoch": 0.35568419360025727, "fp16_scale": 1.0, "global_step": 1106, "grad_norm": 1.8613555067449779, "learning_rate": 1.4936700415716708e-06, "loss": 0.4828, "step": 1106 }, { "ETA": 3.82, "epoch": 0.35600578871201155, "fp16_scale": 1.0, "global_step": 1107, "grad_norm": 2.1396721092458852, "learning_rate": 1.4927636099480433e-06, "loss": 0.5529, "step": 1107 }, { "ETA": 3.82, "epoch": 0.3563273838237659, "fp16_scale": 1.0, "global_step": 1108, "grad_norm": 2.0522833163242264, "learning_rate": 1.4918566433121962e-06, "loss": 0.3821, "step": 1108 }, { "ETA": 3.81, "epoch": 0.35664897893552017, "fp16_scale": 1.0, "global_step": 1109, "grad_norm": 1.9413873200967102, "learning_rate": 1.4909491426488577e-06, "loss": 0.4043, "step": 1109 }, { "ETA": 3.81, "epoch": 0.3569705740472745, "fp16_scale": 1.0, "global_step": 1110, "grad_norm": 1.8795456509662947, "learning_rate": 1.4900411089433363e-06, "loss": 0.4493, "step": 1110 }, { "ETA": 3.81, "epoch": 0.3572921691590288, "fp16_scale": 1.0, "global_step": 1111, "grad_norm": 2.082321891294859, "learning_rate": 1.489132543181518e-06, "loss": 0.4771, "step": 1111 }, { "ETA": 3.81, "epoch": 0.35761376427078306, "fp16_scale": 1.0, "global_step": 1112, "grad_norm": 2.224130407556845, "learning_rate": 1.4882234463498677e-06, "loss": 0.4975, "step": 1112 }, { "ETA": 3.81, "epoch": 0.3579353593825374, "fp16_scale": 1.0, "global_step": 1113, "grad_norm": 1.8071191267023712, "learning_rate": 1.4873138194354266e-06, "loss": 0.4052, "step": 1113 }, { "ETA": 3.81, "epoch": 0.3582569544942917, "fp16_scale": 1.0, "global_step": 1114, "grad_norm": 2.1122509646678598, "learning_rate": 1.486403663425811e-06, "loss": 0.4457, "step": 1114 }, { "ETA": 3.8, "epoch": 0.358578549606046, "fp16_scale": 1.0, "global_step": 1115, "grad_norm": 1.98561107580325, "learning_rate": 1.485492979309212e-06, "loss": 0.4065, "step": 1115 }, { "ETA": 3.8, "epoch": 0.3589001447178003, "fp16_scale": 1.0, "global_step": 1116, "grad_norm": 1.8581086982767931, "learning_rate": 1.4845817680743941e-06, "loss": 0.4603, "step": 1116 }, { "ETA": 3.8, "epoch": 0.3592217398295546, "fp16_scale": 1.0, "global_step": 1117, "grad_norm": 2.0238917547533957, "learning_rate": 1.4836700307106939e-06, "loss": 0.5412, "step": 1117 }, { "ETA": 3.8, "epoch": 0.3595433349413089, "fp16_scale": 1.0, "global_step": 1118, "grad_norm": 1.9850118939316719, "learning_rate": 1.4827577682080198e-06, "loss": 0.4695, "step": 1118 }, { "ETA": 3.8, "epoch": 0.3598649300530632, "fp16_scale": 1.0, "global_step": 1119, "grad_norm": 2.135523671395713, "learning_rate": 1.4818449815568492e-06, "loss": 0.4167, "step": 1119 }, { "ETA": 3.79, "epoch": 0.3601865251648175, "fp16_scale": 1.0, "global_step": 1120, "grad_norm": 2.0196024687993983, "learning_rate": 1.4809316717482298e-06, "loss": 0.466, "step": 1120 }, { "ETA": 3.79, "epoch": 0.3605081202765718, "fp16_scale": 1.0, "global_step": 1121, "grad_norm": 1.8288815589001348, "learning_rate": 1.4800178397737771e-06, "loss": 0.4073, "step": 1121 }, { "ETA": 3.79, "epoch": 0.3608297153883261, "fp16_scale": 1.0, "global_step": 1122, "grad_norm": 1.9586967489426974, "learning_rate": 1.4791034866256728e-06, "loss": 0.4799, "step": 1122 }, { "ETA": 3.79, "epoch": 0.3611513105000804, "fp16_scale": 1.0, "global_step": 1123, "grad_norm": 1.9743135446445312, "learning_rate": 1.4781886132966652e-06, "loss": 0.4088, "step": 1123 }, { "ETA": 3.79, "epoch": 0.3614729056118347, "fp16_scale": 1.0, "global_step": 1124, "grad_norm": 1.857070167265074, "learning_rate": 1.477273220780067e-06, "loss": 0.4299, "step": 1124 }, { "ETA": 3.79, "epoch": 0.361794500723589, "fp16_scale": 1.0, "global_step": 1125, "grad_norm": 1.86225308423235, "learning_rate": 1.4763573100697548e-06, "loss": 0.3947, "step": 1125 }, { "ETA": 3.78, "epoch": 0.3621160958353433, "fp16_scale": 1.0, "global_step": 1126, "grad_norm": 1.8722597560581546, "learning_rate": 1.4754408821601675e-06, "loss": 0.438, "step": 1126 }, { "ETA": 3.78, "epoch": 0.3624376909470976, "fp16_scale": 1.0, "global_step": 1127, "grad_norm": 1.7661756651665166, "learning_rate": 1.4745239380463067e-06, "loss": 0.3494, "step": 1127 }, { "ETA": 3.78, "epoch": 0.3627592860588519, "fp16_scale": 1.0, "global_step": 1128, "grad_norm": 1.8066850422576595, "learning_rate": 1.4736064787237322e-06, "loss": 0.4816, "step": 1128 }, { "ETA": 3.78, "epoch": 0.3630808811706062, "fp16_scale": 1.0, "global_step": 1129, "grad_norm": 1.8532213379815963, "learning_rate": 1.4726885051885652e-06, "loss": 0.4565, "step": 1129 }, { "ETA": 3.78, "epoch": 0.3634024762823605, "fp16_scale": 1.0, "global_step": 1130, "grad_norm": 1.888133209159448, "learning_rate": 1.4717700184374846e-06, "loss": 0.3954, "step": 1130 }, { "ETA": 3.77, "epoch": 0.36372407139411483, "fp16_scale": 1.0, "global_step": 1131, "grad_norm": 1.958330303482453, "learning_rate": 1.4708510194677266e-06, "loss": 0.3943, "step": 1131 }, { "ETA": 3.77, "epoch": 0.3640456665058691, "fp16_scale": 1.0, "global_step": 1132, "grad_norm": 2.4457901298456095, "learning_rate": 1.4699315092770826e-06, "loss": 0.4209, "step": 1132 }, { "ETA": 3.77, "epoch": 0.3643672616176234, "fp16_scale": 1.0, "global_step": 1133, "grad_norm": 2.086980629506438, "learning_rate": 1.469011488863901e-06, "loss": 0.435, "step": 1133 }, { "ETA": 3.76, "epoch": 0.36468885672937773, "fp16_scale": 1.0, "global_step": 1134, "grad_norm": 1.8818455568595487, "learning_rate": 1.4680909592270818e-06, "loss": 0.4205, "step": 1134 }, { "ETA": 3.76, "epoch": 0.365010451841132, "fp16_scale": 1.0, "global_step": 1135, "grad_norm": 1.799982532353527, "learning_rate": 1.4671699213660802e-06, "loss": 0.4193, "step": 1135 }, { "ETA": 3.76, "epoch": 0.3653320469528863, "fp16_scale": 1.0, "global_step": 1136, "grad_norm": 2.0392226099976845, "learning_rate": 1.4662483762809013e-06, "loss": 0.3781, "step": 1136 }, { "ETA": 3.76, "epoch": 0.3656536420646406, "fp16_scale": 1.0, "global_step": 1137, "grad_norm": 1.8884881243701355, "learning_rate": 1.4653263249721018e-06, "loss": 0.5207, "step": 1137 }, { "ETA": 3.76, "epoch": 0.3659752371763949, "fp16_scale": 1.0, "global_step": 1138, "grad_norm": 2.1148533457798178, "learning_rate": 1.4644037684407881e-06, "loss": 0.4956, "step": 1138 }, { "ETA": 3.76, "epoch": 0.36629683228814924, "fp16_scale": 1.0, "global_step": 1139, "grad_norm": 2.1641516091055233, "learning_rate": 1.4634807076886154e-06, "loss": 0.4612, "step": 1139 }, { "ETA": 3.75, "epoch": 0.3666184273999035, "fp16_scale": 1.0, "global_step": 1140, "grad_norm": 1.838643891924867, "learning_rate": 1.462557143717785e-06, "loss": 0.3633, "step": 1140 }, { "ETA": 3.75, "epoch": 0.3669400225116578, "fp16_scale": 1.0, "global_step": 1141, "grad_norm": 1.9660379929411695, "learning_rate": 1.4616330775310462e-06, "loss": 0.3676, "step": 1141 }, { "ETA": 3.75, "epoch": 0.36726161762341214, "fp16_scale": 1.0, "global_step": 1142, "grad_norm": 2.1266579922069444, "learning_rate": 1.4607085101316922e-06, "loss": 0.4574, "step": 1142 }, { "ETA": 3.75, "epoch": 0.3675832127351664, "fp16_scale": 1.0, "global_step": 1143, "grad_norm": 2.270188265598726, "learning_rate": 1.4597834425235617e-06, "loss": 0.5031, "step": 1143 }, { "ETA": 3.74, "epoch": 0.3679048078469207, "fp16_scale": 1.0, "global_step": 1144, "grad_norm": 1.8471350459801226, "learning_rate": 1.4588578757110358e-06, "loss": 0.4623, "step": 1144 }, { "ETA": 3.74, "epoch": 0.36822640295867504, "fp16_scale": 1.0, "global_step": 1145, "grad_norm": 1.9801295558849021, "learning_rate": 1.457931810699037e-06, "loss": 0.3618, "step": 1145 }, { "ETA": 3.74, "epoch": 0.3685479980704293, "fp16_scale": 1.0, "global_step": 1146, "grad_norm": 1.8706238235658559, "learning_rate": 1.4570052484930299e-06, "loss": 0.4039, "step": 1146 }, { "ETA": 3.74, "epoch": 0.36886959318218365, "fp16_scale": 1.0, "global_step": 1147, "grad_norm": 1.9248669172574233, "learning_rate": 1.4560781900990184e-06, "loss": 0.4597, "step": 1147 }, { "ETA": 3.74, "epoch": 0.36919118829393793, "fp16_scale": 1.0, "global_step": 1148, "grad_norm": 1.952201904699033, "learning_rate": 1.4551506365235446e-06, "loss": 0.4923, "step": 1148 }, { "ETA": 3.73, "epoch": 0.3695127834056922, "fp16_scale": 1.0, "global_step": 1149, "grad_norm": 1.972166684174856, "learning_rate": 1.4542225887736894e-06, "loss": 0.4514, "step": 1149 }, { "ETA": 3.73, "epoch": 0.36983437851744655, "fp16_scale": 1.0, "global_step": 1150, "grad_norm": 1.898042187195432, "learning_rate": 1.4532940478570693e-06, "loss": 0.4115, "step": 1150 }, { "ETA": 3.73, "epoch": 0.37015597362920083, "fp16_scale": 1.0, "global_step": 1151, "grad_norm": 2.075689020313724, "learning_rate": 1.4523650147818362e-06, "loss": 0.3744, "step": 1151 }, { "ETA": 3.72, "epoch": 0.3704775687409551, "fp16_scale": 1.0, "global_step": 1152, "grad_norm": 1.9696699565067173, "learning_rate": 1.4514354905566774e-06, "loss": 0.4121, "step": 1152 }, { "ETA": 3.72, "epoch": 0.37079916385270945, "fp16_scale": 1.0, "global_step": 1153, "grad_norm": 1.9542736425506968, "learning_rate": 1.4505054761908123e-06, "loss": 0.398, "step": 1153 }, { "ETA": 3.72, "epoch": 0.3711207589644637, "fp16_scale": 1.0, "global_step": 1154, "grad_norm": 1.7919743813455156, "learning_rate": 1.4495749726939926e-06, "loss": 0.4436, "step": 1154 }, { "ETA": 3.72, "epoch": 0.37144235407621806, "fp16_scale": 1.0, "global_step": 1155, "grad_norm": 2.3410907527300653, "learning_rate": 1.448643981076502e-06, "loss": 0.3645, "step": 1155 }, { "ETA": 3.71, "epoch": 0.37176394918797234, "fp16_scale": 1.0, "global_step": 1156, "grad_norm": 2.0876492805195705, "learning_rate": 1.4477125023491535e-06, "loss": 0.3779, "step": 1156 }, { "ETA": 3.71, "epoch": 0.3720855442997266, "fp16_scale": 1.0, "global_step": 1157, "grad_norm": 1.8698333270432437, "learning_rate": 1.4467805375232888e-06, "loss": 0.4079, "step": 1157 }, { "ETA": 3.71, "epoch": 0.37240713941148096, "fp16_scale": 1.0, "global_step": 1158, "grad_norm": 1.9124687998385432, "learning_rate": 1.4458480876107777e-06, "loss": 0.4337, "step": 1158 }, { "ETA": 3.71, "epoch": 0.37272873452323524, "fp16_scale": 1.0, "global_step": 1159, "grad_norm": 1.8427479575262413, "learning_rate": 1.4449151536240165e-06, "loss": 0.4576, "step": 1159 }, { "ETA": 3.71, "epoch": 0.3730503296349896, "fp16_scale": 1.0, "global_step": 1160, "grad_norm": 1.9713810052722738, "learning_rate": 1.4439817365759272e-06, "loss": 0.457, "step": 1160 }, { "ETA": 3.7, "epoch": 0.37337192474674386, "fp16_scale": 1.0, "global_step": 1161, "grad_norm": 2.164070706224667, "learning_rate": 1.4430478374799564e-06, "loss": 0.339, "step": 1161 }, { "ETA": 3.7, "epoch": 0.37369351985849814, "fp16_scale": 1.0, "global_step": 1162, "grad_norm": 2.145443423694682, "learning_rate": 1.4421134573500736e-06, "loss": 0.3927, "step": 1162 }, { "ETA": 3.7, "epoch": 0.37401511497025247, "fp16_scale": 1.0, "global_step": 1163, "grad_norm": 2.1312953943216737, "learning_rate": 1.4411785972007712e-06, "loss": 0.4088, "step": 1163 }, { "ETA": 3.69, "epoch": 0.37433671008200675, "fp16_scale": 1.0, "global_step": 1164, "grad_norm": 2.228988248090331, "learning_rate": 1.4402432580470622e-06, "loss": 0.4339, "step": 1164 }, { "ETA": 3.69, "epoch": 0.37465830519376103, "fp16_scale": 1.0, "global_step": 1165, "grad_norm": 2.1241862946324286, "learning_rate": 1.43930744090448e-06, "loss": 0.4488, "step": 1165 }, { "ETA": 3.69, "epoch": 0.37497990030551537, "fp16_scale": 1.0, "global_step": 1166, "grad_norm": 2.031680088017968, "learning_rate": 1.4383711467890773e-06, "loss": 0.407, "step": 1166 }, { "ETA": 3.69, "epoch": 0.37530149541726965, "fp16_scale": 1.0, "global_step": 1167, "grad_norm": 2.19404819289449, "learning_rate": 1.437434376717424e-06, "loss": 0.3353, "step": 1167 }, { "ETA": 3.69, "epoch": 0.375623090529024, "fp16_scale": 1.0, "global_step": 1168, "grad_norm": 2.0969147691699974, "learning_rate": 1.436497131706607e-06, "loss": 0.5032, "step": 1168 }, { "ETA": 3.68, "epoch": 0.37594468564077826, "fp16_scale": 1.0, "global_step": 1169, "grad_norm": 1.9700428428502548, "learning_rate": 1.435559412774229e-06, "loss": 0.5975, "step": 1169 }, { "ETA": 3.68, "epoch": 0.37626628075253254, "fp16_scale": 1.0, "global_step": 1170, "grad_norm": 1.9108611126655912, "learning_rate": 1.4346212209384065e-06, "loss": 0.4207, "step": 1170 }, { "ETA": 3.68, "epoch": 0.3765878758642869, "fp16_scale": 1.0, "global_step": 1171, "grad_norm": 2.175370372481936, "learning_rate": 1.4336825572177714e-06, "loss": 0.4746, "step": 1171 }, { "ETA": 3.68, "epoch": 0.37690947097604116, "fp16_scale": 1.0, "global_step": 1172, "grad_norm": 1.9183000041755622, "learning_rate": 1.4327434226314656e-06, "loss": 0.5358, "step": 1172 }, { "ETA": 3.68, "epoch": 0.37723106608779544, "fp16_scale": 1.0, "global_step": 1173, "grad_norm": 1.9645578277864115, "learning_rate": 1.4318038181991439e-06, "loss": 0.4341, "step": 1173 }, { "ETA": 3.68, "epoch": 0.3775526611995498, "fp16_scale": 1.0, "global_step": 1174, "grad_norm": 1.9791378068214516, "learning_rate": 1.4308637449409703e-06, "loss": 0.3746, "step": 1174 }, { "ETA": 3.67, "epoch": 0.37787425631130406, "fp16_scale": 1.0, "global_step": 1175, "grad_norm": 2.29483082945288, "learning_rate": 1.4299232038776183e-06, "loss": 0.4422, "step": 1175 }, { "ETA": 3.67, "epoch": 0.3781958514230584, "fp16_scale": 1.0, "global_step": 1176, "grad_norm": 1.8897784540429947, "learning_rate": 1.4289821960302687e-06, "loss": 0.4731, "step": 1176 }, { "ETA": 3.67, "epoch": 0.3785174465348127, "fp16_scale": 1.0, "global_step": 1177, "grad_norm": 2.0911449597866687, "learning_rate": 1.4280407224206103e-06, "loss": 0.3661, "step": 1177 }, { "ETA": 3.67, "epoch": 0.37883904164656695, "fp16_scale": 1.0, "global_step": 1178, "grad_norm": 2.0359556730170514, "learning_rate": 1.4270987840708366e-06, "loss": 0.3681, "step": 1178 }, { "ETA": 3.66, "epoch": 0.3791606367583213, "fp16_scale": 1.0, "global_step": 1179, "grad_norm": 2.1067301476592153, "learning_rate": 1.4261563820036454e-06, "loss": 0.5051, "step": 1179 }, { "ETA": 3.66, "epoch": 0.37948223187007557, "fp16_scale": 1.0, "global_step": 1180, "grad_norm": 1.841840073585346, "learning_rate": 1.4252135172422395e-06, "loss": 0.4756, "step": 1180 }, { "ETA": 3.66, "epoch": 0.37980382698182985, "fp16_scale": 1.0, "global_step": 1181, "grad_norm": 1.903373744148128, "learning_rate": 1.4242701908103218e-06, "loss": 0.4239, "step": 1181 }, { "ETA": 3.66, "epoch": 0.3801254220935842, "fp16_scale": 1.0, "global_step": 1182, "grad_norm": 1.9609762778755848, "learning_rate": 1.4233264037320992e-06, "loss": 0.4805, "step": 1182 }, { "ETA": 3.66, "epoch": 0.38044701720533847, "fp16_scale": 1.0, "global_step": 1183, "grad_norm": 2.0360191156115097, "learning_rate": 1.422382157032276e-06, "loss": 0.4749, "step": 1183 }, { "ETA": 3.66, "epoch": 0.3807686123170928, "fp16_scale": 1.0, "global_step": 1184, "grad_norm": 2.0375993595943322, "learning_rate": 1.4214374517360575e-06, "loss": 0.4791, "step": 1184 }, { "ETA": 3.65, "epoch": 0.3810902074288471, "fp16_scale": 1.0, "global_step": 1185, "grad_norm": 2.069701150745817, "learning_rate": 1.4204922888691462e-06, "loss": 0.5296, "step": 1185 }, { "ETA": 3.65, "epoch": 0.38141180254060136, "fp16_scale": 1.0, "global_step": 1186, "grad_norm": 1.7739466851664754, "learning_rate": 1.4195466694577414e-06, "loss": 0.3603, "step": 1186 }, { "ETA": 3.65, "epoch": 0.3817333976523557, "fp16_scale": 1.0, "global_step": 1187, "grad_norm": 1.8934344918273596, "learning_rate": 1.4186005945285374e-06, "loss": 0.4093, "step": 1187 }, { "ETA": 3.65, "epoch": 0.38205499276411, "fp16_scale": 1.0, "global_step": 1188, "grad_norm": 2.099195276315028, "learning_rate": 1.4176540651087253e-06, "loss": 0.4622, "step": 1188 }, { "ETA": 3.65, "epoch": 0.38237658787586426, "fp16_scale": 1.0, "global_step": 1189, "grad_norm": 2.0442817261367714, "learning_rate": 1.4167070822259865e-06, "loss": 0.4139, "step": 1189 }, { "ETA": 3.64, "epoch": 0.3826981829876186, "fp16_scale": 1.0, "global_step": 1190, "grad_norm": 1.9239751649252113, "learning_rate": 1.4157596469084973e-06, "loss": 0.4397, "step": 1190 }, { "ETA": 3.64, "epoch": 0.3830197780993729, "fp16_scale": 1.0, "global_step": 1191, "grad_norm": 1.820718900998498, "learning_rate": 1.4148117601849245e-06, "loss": 0.3975, "step": 1191 }, { "ETA": 3.64, "epoch": 0.3833413732111272, "fp16_scale": 1.0, "global_step": 1192, "grad_norm": 1.8203421531298762, "learning_rate": 1.4138634230844239e-06, "loss": 0.4198, "step": 1192 }, { "ETA": 3.64, "epoch": 0.3836629683228815, "fp16_scale": 1.0, "global_step": 1193, "grad_norm": 1.8982203182013695, "learning_rate": 1.4129146366366422e-06, "loss": 0.4429, "step": 1193 }, { "ETA": 3.64, "epoch": 0.3839845634346358, "fp16_scale": 1.0, "global_step": 1194, "grad_norm": 1.7244159154380014, "learning_rate": 1.4119654018717125e-06, "loss": 0.4209, "step": 1194 }, { "ETA": 3.64, "epoch": 0.3843061585463901, "fp16_scale": 1.0, "global_step": 1195, "grad_norm": 1.7945422968823292, "learning_rate": 1.4110157198202547e-06, "loss": 0.4141, "step": 1195 }, { "ETA": 3.63, "epoch": 0.3846277536581444, "fp16_scale": 1.0, "global_step": 1196, "grad_norm": 1.7696561457719395, "learning_rate": 1.410065591513376e-06, "loss": 0.462, "step": 1196 }, { "ETA": 3.63, "epoch": 0.3849493487698987, "fp16_scale": 1.0, "global_step": 1197, "grad_norm": 2.0332531442940955, "learning_rate": 1.409115017982666e-06, "loss": 0.4538, "step": 1197 }, { "ETA": 3.63, "epoch": 0.385270943881653, "fp16_scale": 1.0, "global_step": 1198, "grad_norm": 1.9883517762567522, "learning_rate": 1.4081640002601981e-06, "loss": 0.4626, "step": 1198 }, { "ETA": 3.63, "epoch": 0.3855925389934073, "fp16_scale": 1.0, "global_step": 1199, "grad_norm": 1.9941088081217284, "learning_rate": 1.4072125393785294e-06, "loss": 0.4557, "step": 1199 }, { "ETA": 3.63, "epoch": 0.3859141341051616, "fp16_scale": 1.0, "global_step": 1200, "grad_norm": 1.9058006896509434, "learning_rate": 1.4062606363706971e-06, "loss": 0.4577, "step": 1200 }, { "ETA": 3.64, "epoch": 0.3862357292169159, "fp16_scale": 1.0, "global_step": 1201, "grad_norm": 1.8311918641065574, "learning_rate": 1.4053082922702183e-06, "loss": 0.4237, "step": 1201 }, { "ETA": 3.64, "epoch": 0.3865573243286702, "fp16_scale": 1.0, "global_step": 1202, "grad_norm": 1.9026338376726455, "learning_rate": 1.4043555081110892e-06, "loss": 0.478, "step": 1202 }, { "ETA": 3.64, "epoch": 0.3868789194404245, "fp16_scale": 1.0, "global_step": 1203, "grad_norm": 1.9074712187600207, "learning_rate": 1.4034022849277842e-06, "loss": 0.4715, "step": 1203 }, { "ETA": 3.63, "epoch": 0.3872005145521788, "fp16_scale": 1.0, "global_step": 1204, "grad_norm": 1.889922757335601, "learning_rate": 1.4024486237552537e-06, "loss": 0.4644, "step": 1204 }, { "ETA": 3.63, "epoch": 0.38752210966393313, "fp16_scale": 1.0, "global_step": 1205, "grad_norm": 1.9012843410779694, "learning_rate": 1.4014945256289239e-06, "loss": 0.4764, "step": 1205 }, { "ETA": 3.63, "epoch": 0.3878437047756874, "fp16_scale": 1.0, "global_step": 1206, "grad_norm": 2.094894510992424, "learning_rate": 1.4005399915846955e-06, "loss": 0.4528, "step": 1206 }, { "ETA": 3.63, "epoch": 0.3881652998874417, "fp16_scale": 1.0, "global_step": 1207, "grad_norm": 1.715314005837692, "learning_rate": 1.3995850226589431e-06, "loss": 0.4322, "step": 1207 }, { "ETA": 3.63, "epoch": 0.38848689499919603, "fp16_scale": 1.0, "global_step": 1208, "grad_norm": 2.074525094768092, "learning_rate": 1.3986296198885122e-06, "loss": 0.4331, "step": 1208 }, { "ETA": 3.62, "epoch": 0.3888084901109503, "fp16_scale": 1.0, "global_step": 1209, "grad_norm": 1.6998510772388846, "learning_rate": 1.39767378431072e-06, "loss": 0.3926, "step": 1209 }, { "ETA": 3.62, "epoch": 0.3891300852227046, "fp16_scale": 1.0, "global_step": 1210, "grad_norm": 2.1933516360856986, "learning_rate": 1.3967175169633536e-06, "loss": 0.4228, "step": 1210 }, { "ETA": 3.62, "epoch": 0.3894516803344589, "fp16_scale": 1.0, "global_step": 1211, "grad_norm": 2.043829844085143, "learning_rate": 1.395760818884669e-06, "loss": 0.4713, "step": 1211 }, { "ETA": 3.62, "epoch": 0.3897732754462132, "fp16_scale": 1.0, "global_step": 1212, "grad_norm": 2.0322694459756407, "learning_rate": 1.3948036911133899e-06, "loss": 0.4377, "step": 1212 }, { "ETA": 3.62, "epoch": 0.39009487055796754, "fp16_scale": 1.0, "global_step": 1213, "grad_norm": 2.0356245607499166, "learning_rate": 1.3938461346887061e-06, "loss": 0.4721, "step": 1213 }, { "ETA": 3.62, "epoch": 0.3904164656697218, "fp16_scale": 1.0, "global_step": 1214, "grad_norm": 2.0212017990485, "learning_rate": 1.3928881506502732e-06, "loss": 0.4984, "step": 1214 }, { "ETA": 3.62, "epoch": 0.3907380607814761, "fp16_scale": 1.0, "global_step": 1215, "grad_norm": 1.7592455357833894, "learning_rate": 1.3919297400382108e-06, "loss": 0.4463, "step": 1215 }, { "ETA": 3.61, "epoch": 0.39105965589323044, "fp16_scale": 1.0, "global_step": 1216, "grad_norm": 2.174186834923421, "learning_rate": 1.3909709038931021e-06, "loss": 0.5003, "step": 1216 }, { "ETA": 3.61, "epoch": 0.3913812510049847, "fp16_scale": 1.0, "global_step": 1217, "grad_norm": 2.149807128715763, "learning_rate": 1.3900116432559918e-06, "loss": 0.3546, "step": 1217 }, { "ETA": 3.61, "epoch": 0.391702846116739, "fp16_scale": 1.0, "global_step": 1218, "grad_norm": 1.9846826547687488, "learning_rate": 1.3890519591683858e-06, "loss": 0.4332, "step": 1218 }, { "ETA": 3.61, "epoch": 0.39202444122849334, "fp16_scale": 1.0, "global_step": 1219, "grad_norm": 1.9078545416159736, "learning_rate": 1.3880918526722496e-06, "loss": 0.4545, "step": 1219 }, { "ETA": 3.6, "epoch": 0.3923460363402476, "fp16_scale": 1.0, "global_step": 1220, "grad_norm": 1.960043066110628, "learning_rate": 1.3871313248100076e-06, "loss": 0.3758, "step": 1220 }, { "ETA": 3.6, "epoch": 0.39266763145200195, "fp16_scale": 1.0, "global_step": 1221, "grad_norm": 2.015117131614875, "learning_rate": 1.3861703766245412e-06, "loss": 0.4708, "step": 1221 }, { "ETA": 3.6, "epoch": 0.39298922656375623, "fp16_scale": 1.0, "global_step": 1222, "grad_norm": 1.806741383707497, "learning_rate": 1.3852090091591887e-06, "loss": 0.4657, "step": 1222 }, { "ETA": 3.6, "epoch": 0.3933108216755105, "fp16_scale": 1.0, "global_step": 1223, "grad_norm": 1.9493700217050751, "learning_rate": 1.3842472234577429e-06, "loss": 0.3968, "step": 1223 }, { "ETA": 3.6, "epoch": 0.39363241678726485, "fp16_scale": 1.0, "global_step": 1224, "grad_norm": 1.7607255605255163, "learning_rate": 1.3832850205644518e-06, "loss": 0.4337, "step": 1224 }, { "ETA": 3.6, "epoch": 0.39395401189901913, "fp16_scale": 1.0, "global_step": 1225, "grad_norm": 2.0830654554064165, "learning_rate": 1.3823224015240154e-06, "loss": 0.4527, "step": 1225 }, { "ETA": 3.6, "epoch": 0.3942756070107734, "fp16_scale": 1.0, "global_step": 1226, "grad_norm": 1.8106200568360995, "learning_rate": 1.3813593673815857e-06, "loss": 0.4932, "step": 1226 }, { "ETA": 3.59, "epoch": 0.39459720212252775, "fp16_scale": 1.0, "global_step": 1227, "grad_norm": 2.1081485927831802, "learning_rate": 1.3803959191827659e-06, "loss": 0.4035, "step": 1227 }, { "ETA": 3.59, "epoch": 0.394918797234282, "fp16_scale": 1.0, "global_step": 1228, "grad_norm": 1.8585035098425444, "learning_rate": 1.3794320579736083e-06, "loss": 0.3873, "step": 1228 }, { "ETA": 3.59, "epoch": 0.39524039234603636, "fp16_scale": 1.0, "global_step": 1229, "grad_norm": 2.0499251240730882, "learning_rate": 1.3784677848006135e-06, "loss": 0.5385, "step": 1229 }, { "ETA": 3.59, "epoch": 0.39556198745779064, "fp16_scale": 1.0, "global_step": 1230, "grad_norm": 1.9552820293742015, "learning_rate": 1.3775031007107299e-06, "loss": 0.4264, "step": 1230 }, { "ETA": 3.59, "epoch": 0.3958835825695449, "fp16_scale": 1.0, "global_step": 1231, "grad_norm": 1.7723291980659213, "learning_rate": 1.3765380067513518e-06, "loss": 0.4062, "step": 1231 }, { "ETA": 3.58, "epoch": 0.39620517768129926, "fp16_scale": 1.0, "global_step": 1232, "grad_norm": 1.9574839735840515, "learning_rate": 1.3755725039703179e-06, "loss": 0.4256, "step": 1232 }, { "ETA": 3.58, "epoch": 0.39652677279305354, "fp16_scale": 1.0, "global_step": 1233, "grad_norm": 2.274789719814242, "learning_rate": 1.374606593415912e-06, "loss": 0.4074, "step": 1233 }, { "ETA": 3.58, "epoch": 0.3968483679048078, "fp16_scale": 1.0, "global_step": 1234, "grad_norm": 1.9997290474582707, "learning_rate": 1.3736402761368594e-06, "loss": 0.3597, "step": 1234 }, { "ETA": 3.58, "epoch": 0.39716996301656216, "fp16_scale": 1.0, "global_step": 1235, "grad_norm": 2.1530252175996925, "learning_rate": 1.3726735531823286e-06, "loss": 0.5291, "step": 1235 }, { "ETA": 3.57, "epoch": 0.39749155812831644, "fp16_scale": 1.0, "global_step": 1236, "grad_norm": 1.974745618508477, "learning_rate": 1.3717064256019264e-06, "loss": 0.4301, "step": 1236 }, { "ETA": 3.57, "epoch": 0.39781315324007077, "fp16_scale": 1.0, "global_step": 1237, "grad_norm": 1.9728394120343977, "learning_rate": 1.3707388944457006e-06, "loss": 0.4078, "step": 1237 }, { "ETA": 3.57, "epoch": 0.39813474835182505, "fp16_scale": 1.0, "global_step": 1238, "grad_norm": 2.078266927630806, "learning_rate": 1.3697709607641365e-06, "loss": 0.4179, "step": 1238 }, { "ETA": 3.57, "epoch": 0.39845634346357933, "fp16_scale": 1.0, "global_step": 1239, "grad_norm": 1.9533156961389537, "learning_rate": 1.3688026256081563e-06, "loss": 0.4724, "step": 1239 }, { "ETA": 3.57, "epoch": 0.39877793857533367, "fp16_scale": 1.0, "global_step": 1240, "grad_norm": 1.844753616843298, "learning_rate": 1.3678338900291188e-06, "loss": 0.4093, "step": 1240 }, { "ETA": 3.56, "epoch": 0.39909953368708795, "fp16_scale": 1.0, "global_step": 1241, "grad_norm": 1.932700638754792, "learning_rate": 1.3668647550788172e-06, "loss": 0.4067, "step": 1241 }, { "ETA": 3.56, "epoch": 0.3994211287988423, "fp16_scale": 1.0, "global_step": 1242, "grad_norm": 1.9695963095648668, "learning_rate": 1.3658952218094779e-06, "loss": 0.5026, "step": 1242 }, { "ETA": 3.56, "epoch": 0.39974272391059656, "fp16_scale": 1.0, "global_step": 1243, "grad_norm": 1.935446986486094, "learning_rate": 1.3649252912737602e-06, "loss": 0.4253, "step": 1243 }, { "ETA": 3.56, "epoch": 0.40006431902235084, "fp16_scale": 1.0, "global_step": 1244, "grad_norm": 2.2589571761933223, "learning_rate": 1.3639549645247544e-06, "loss": 0.3581, "step": 1244 }, { "ETA": 3.55, "epoch": 0.4003859141341052, "fp16_scale": 1.0, "global_step": 1245, "grad_norm": 2.1682110677343918, "learning_rate": 1.3629842426159813e-06, "loss": 0.3833, "step": 1245 }, { "ETA": 3.55, "epoch": 0.40070750924585946, "fp16_scale": 1.0, "global_step": 1246, "grad_norm": 2.0533016479117836, "learning_rate": 1.362013126601391e-06, "loss": 0.4652, "step": 1246 }, { "ETA": 3.55, "epoch": 0.40102910435761374, "fp16_scale": 1.0, "global_step": 1247, "grad_norm": 1.916742916306892, "learning_rate": 1.3610416175353609e-06, "loss": 0.4205, "step": 1247 }, { "ETA": 3.55, "epoch": 0.4013506994693681, "fp16_scale": 1.0, "global_step": 1248, "grad_norm": 2.2265100081173816, "learning_rate": 1.3600697164726954e-06, "loss": 0.4355, "step": 1248 }, { "ETA": 3.55, "epoch": 0.40167229458112236, "fp16_scale": 1.0, "global_step": 1249, "grad_norm": 2.05052390622889, "learning_rate": 1.3590974244686246e-06, "loss": 0.3994, "step": 1249 }, { "ETA": 3.55, "epoch": 0.4019938896928767, "fp16_scale": 1.0, "global_step": 1250, "grad_norm": 2.1337818401174067, "learning_rate": 1.358124742578803e-06, "loss": 0.4115, "step": 1250 }, { "ETA": 3.54, "epoch": 0.402315484804631, "fp16_scale": 1.0, "global_step": 1251, "grad_norm": 1.926233045970569, "learning_rate": 1.3571516718593078e-06, "loss": 0.4138, "step": 1251 }, { "ETA": 3.54, "epoch": 0.40263707991638525, "fp16_scale": 1.0, "global_step": 1252, "grad_norm": 1.8012746034968832, "learning_rate": 1.3561782133666396e-06, "loss": 0.4444, "step": 1252 }, { "ETA": 3.54, "epoch": 0.4029586750281396, "fp16_scale": 1.0, "global_step": 1253, "grad_norm": 1.8893945173687816, "learning_rate": 1.355204368157719e-06, "loss": 0.4106, "step": 1253 }, { "ETA": 3.54, "epoch": 0.40328027013989387, "fp16_scale": 1.0, "global_step": 1254, "grad_norm": 1.8259930391833834, "learning_rate": 1.354230137289887e-06, "loss": 0.3353, "step": 1254 }, { "ETA": 3.53, "epoch": 0.40360186525164815, "fp16_scale": 1.0, "global_step": 1255, "grad_norm": 2.1136932321072717, "learning_rate": 1.3532555218209034e-06, "loss": 0.3952, "step": 1255 }, { "ETA": 3.53, "epoch": 0.4039234603634025, "fp16_scale": 1.0, "global_step": 1256, "grad_norm": 2.078439130562752, "learning_rate": 1.3522805228089449e-06, "loss": 0.424, "step": 1256 }, { "ETA": 3.53, "epoch": 0.40424505547515677, "fp16_scale": 1.0, "global_step": 1257, "grad_norm": 2.1196724619474865, "learning_rate": 1.3513051413126051e-06, "loss": 0.4363, "step": 1257 }, { "ETA": 3.53, "epoch": 0.4045666505869111, "fp16_scale": 1.0, "global_step": 1258, "grad_norm": 1.9350173128719281, "learning_rate": 1.350329378390893e-06, "loss": 0.4015, "step": 1258 }, { "ETA": 3.52, "epoch": 0.4048882456986654, "fp16_scale": 1.0, "global_step": 1259, "grad_norm": 2.0707852214265996, "learning_rate": 1.3493532351032317e-06, "loss": 0.3762, "step": 1259 }, { "ETA": 3.52, "epoch": 0.40520984081041966, "fp16_scale": 1.0, "global_step": 1260, "grad_norm": 2.269699660425376, "learning_rate": 1.3483767125094572e-06, "loss": 0.3905, "step": 1260 }, { "ETA": 3.52, "epoch": 0.405531435922174, "fp16_scale": 1.0, "global_step": 1261, "grad_norm": 2.003311715529416, "learning_rate": 1.3473998116698168e-06, "loss": 0.4096, "step": 1261 }, { "ETA": 3.52, "epoch": 0.4058530310339283, "fp16_scale": 1.0, "global_step": 1262, "grad_norm": 1.9431104363624927, "learning_rate": 1.3464225336449693e-06, "loss": 0.4847, "step": 1262 }, { "ETA": 3.52, "epoch": 0.40617462614568256, "fp16_scale": 1.0, "global_step": 1263, "grad_norm": 1.9462633816271575, "learning_rate": 1.3454448794959826e-06, "loss": 0.4594, "step": 1263 }, { "ETA": 3.51, "epoch": 0.4064962212574369, "fp16_scale": 1.0, "global_step": 1264, "grad_norm": 2.07051637480553, "learning_rate": 1.344466850284333e-06, "loss": 0.3757, "step": 1264 }, { "ETA": 3.51, "epoch": 0.4068178163691912, "fp16_scale": 1.0, "global_step": 1265, "grad_norm": 2.2664569035479993, "learning_rate": 1.3434884470719038e-06, "loss": 0.4937, "step": 1265 }, { "ETA": 3.51, "epoch": 0.4071394114809455, "fp16_scale": 1.0, "global_step": 1266, "grad_norm": 2.016763153913431, "learning_rate": 1.3425096709209855e-06, "loss": 0.4492, "step": 1266 }, { "ETA": 3.51, "epoch": 0.4074610065926998, "fp16_scale": 1.0, "global_step": 1267, "grad_norm": 2.4052542025182904, "learning_rate": 1.3415305228942717e-06, "loss": 0.4578, "step": 1267 }, { "ETA": 3.51, "epoch": 0.4077826017044541, "fp16_scale": 1.0, "global_step": 1268, "grad_norm": 2.055440757308952, "learning_rate": 1.3405510040548611e-06, "loss": 0.4202, "step": 1268 }, { "ETA": 3.5, "epoch": 0.4081041968162084, "fp16_scale": 1.0, "global_step": 1269, "grad_norm": 1.7581727936551301, "learning_rate": 1.3395711154662548e-06, "loss": 0.4111, "step": 1269 }, { "ETA": 3.5, "epoch": 0.4084257919279627, "fp16_scale": 1.0, "global_step": 1270, "grad_norm": 2.143366393966735, "learning_rate": 1.3385908581923547e-06, "loss": 0.3887, "step": 1270 }, { "ETA": 3.5, "epoch": 0.40874738703971697, "fp16_scale": 1.0, "global_step": 1271, "grad_norm": 1.9717120060580338, "learning_rate": 1.3376102332974639e-06, "loss": 0.3799, "step": 1271 }, { "ETA": 3.5, "epoch": 0.4090689821514713, "fp16_scale": 1.0, "global_step": 1272, "grad_norm": 2.33211450756805, "learning_rate": 1.336629241846284e-06, "loss": 0.4056, "step": 1272 }, { "ETA": 3.49, "epoch": 0.4093905772632256, "fp16_scale": 1.0, "global_step": 1273, "grad_norm": 1.8909583337438918, "learning_rate": 1.3356478849039148e-06, "loss": 0.38, "step": 1273 }, { "ETA": 3.49, "epoch": 0.4097121723749799, "fp16_scale": 1.0, "global_step": 1274, "grad_norm": 2.1144769906691754, "learning_rate": 1.334666163535853e-06, "loss": 0.4052, "step": 1274 }, { "ETA": 3.49, "epoch": 0.4100337674867342, "fp16_scale": 1.0, "global_step": 1275, "grad_norm": 1.8883487851099277, "learning_rate": 1.3336840788079905e-06, "loss": 0.4357, "step": 1275 }, { "ETA": 3.49, "epoch": 0.4103553625984885, "fp16_scale": 1.0, "global_step": 1276, "grad_norm": 1.8964786960646427, "learning_rate": 1.3327016317866146e-06, "loss": 0.4628, "step": 1276 }, { "ETA": 3.49, "epoch": 0.4106769577102428, "fp16_scale": 1.0, "global_step": 1277, "grad_norm": 1.8450898049718576, "learning_rate": 1.3317188235384051e-06, "loss": 0.3882, "step": 1277 }, { "ETA": 3.48, "epoch": 0.4109985528219971, "fp16_scale": 1.0, "global_step": 1278, "grad_norm": 2.0586266436988576, "learning_rate": 1.3307356551304347e-06, "loss": 0.4246, "step": 1278 }, { "ETA": 3.48, "epoch": 0.41132014793375143, "fp16_scale": 1.0, "global_step": 1279, "grad_norm": 2.0311311801760064, "learning_rate": 1.3297521276301665e-06, "loss": 0.4669, "step": 1279 }, { "ETA": 3.48, "epoch": 0.4116417430455057, "fp16_scale": 1.0, "global_step": 1280, "grad_norm": 2.1469580929284033, "learning_rate": 1.3287682421054538e-06, "loss": 0.3494, "step": 1280 }, { "ETA": 3.48, "epoch": 0.41196333815726, "fp16_scale": 1.0, "global_step": 1281, "grad_norm": 2.1130366516356567, "learning_rate": 1.3277839996245387e-06, "loss": 0.4841, "step": 1281 }, { "ETA": 3.47, "epoch": 0.41228493326901433, "fp16_scale": 1.0, "global_step": 1282, "grad_norm": 2.1267466288125245, "learning_rate": 1.3267994012560504e-06, "loss": 0.4249, "step": 1282 }, { "ETA": 3.47, "epoch": 0.4126065283807686, "fp16_scale": 1.0, "global_step": 1283, "grad_norm": 2.036250364245317, "learning_rate": 1.3258144480690054e-06, "loss": 0.4517, "step": 1283 }, { "ETA": 3.47, "epoch": 0.4129281234925229, "fp16_scale": 1.0, "global_step": 1284, "grad_norm": 2.038258782557574, "learning_rate": 1.3248291411328045e-06, "loss": 0.5058, "step": 1284 }, { "ETA": 3.47, "epoch": 0.4132497186042772, "fp16_scale": 1.0, "global_step": 1285, "grad_norm": 2.158157978740966, "learning_rate": 1.3238434815172333e-06, "loss": 0.4319, "step": 1285 }, { "ETA": 3.47, "epoch": 0.4135713137160315, "fp16_scale": 1.0, "global_step": 1286, "grad_norm": 1.8677714276173643, "learning_rate": 1.3228574702924595e-06, "loss": 0.4284, "step": 1286 }, { "ETA": 3.47, "epoch": 0.41389290882778584, "fp16_scale": 1.0, "global_step": 1287, "grad_norm": 1.891794993060621, "learning_rate": 1.3218711085290333e-06, "loss": 0.4004, "step": 1287 }, { "ETA": 3.46, "epoch": 0.4142145039395401, "fp16_scale": 1.0, "global_step": 1288, "grad_norm": 1.9120263262061623, "learning_rate": 1.3208843972978854e-06, "loss": 0.4944, "step": 1288 }, { "ETA": 3.46, "epoch": 0.4145360990512944, "fp16_scale": 1.0, "global_step": 1289, "grad_norm": 1.8925294094801386, "learning_rate": 1.3198973376703256e-06, "loss": 0.3965, "step": 1289 }, { "ETA": 3.46, "epoch": 0.41485769416304874, "fp16_scale": 1.0, "global_step": 1290, "grad_norm": 1.9064706308968085, "learning_rate": 1.3189099307180421e-06, "loss": 0.4539, "step": 1290 }, { "ETA": 3.46, "epoch": 0.415179289274803, "fp16_scale": 1.0, "global_step": 1291, "grad_norm": 1.8373911459489973, "learning_rate": 1.3179221775131004e-06, "loss": 0.402, "step": 1291 }, { "ETA": 3.46, "epoch": 0.4155008843865573, "fp16_scale": 1.0, "global_step": 1292, "grad_norm": 1.9135215511363823, "learning_rate": 1.3169340791279413e-06, "loss": 0.3875, "step": 1292 }, { "ETA": 3.45, "epoch": 0.41582247949831164, "fp16_scale": 1.0, "global_step": 1293, "grad_norm": 1.9368908834871557, "learning_rate": 1.3159456366353814e-06, "loss": 0.4253, "step": 1293 }, { "ETA": 3.45, "epoch": 0.4161440746100659, "fp16_scale": 1.0, "global_step": 1294, "grad_norm": 1.9648459069380593, "learning_rate": 1.3149568511086101e-06, "loss": 0.4024, "step": 1294 }, { "ETA": 3.45, "epoch": 0.41646566972182025, "fp16_scale": 1.0, "global_step": 1295, "grad_norm": 2.479224124944344, "learning_rate": 1.31396772362119e-06, "loss": 0.4545, "step": 1295 }, { "ETA": 3.45, "epoch": 0.41678726483357453, "fp16_scale": 1.0, "global_step": 1296, "grad_norm": 1.8852621192902068, "learning_rate": 1.3129782552470536e-06, "loss": 0.4347, "step": 1296 }, { "ETA": 3.45, "epoch": 0.4171088599453288, "fp16_scale": 1.0, "global_step": 1297, "grad_norm": 1.6736510366199835, "learning_rate": 1.311988447060505e-06, "loss": 0.4099, "step": 1297 }, { "ETA": 3.45, "epoch": 0.41743045505708315, "fp16_scale": 1.0, "global_step": 1298, "grad_norm": 1.856676771155561, "learning_rate": 1.3109983001362165e-06, "loss": 0.4255, "step": 1298 }, { "ETA": 3.44, "epoch": 0.41775205016883743, "fp16_scale": 1.0, "global_step": 1299, "grad_norm": 2.023042928084088, "learning_rate": 1.3100078155492283e-06, "loss": 0.3876, "step": 1299 }, { "ETA": 3.44, "epoch": 0.4180736452805917, "fp16_scale": 1.0, "global_step": 1300, "grad_norm": 1.8877427544955612, "learning_rate": 1.3090169943749473e-06, "loss": 0.3698, "step": 1300 }, { "ETA": 3.44, "epoch": 0.41839524039234605, "fp16_scale": 1.0, "global_step": 1301, "grad_norm": 1.873112217728738, "learning_rate": 1.308025837689146e-06, "loss": 0.4068, "step": 1301 }, { "ETA": 3.44, "epoch": 0.4187168355041003, "fp16_scale": 1.0, "global_step": 1302, "grad_norm": 2.051326993625414, "learning_rate": 1.3070343465679606e-06, "loss": 0.4576, "step": 1302 }, { "ETA": 3.43, "epoch": 0.41903843061585466, "fp16_scale": 1.0, "global_step": 1303, "grad_norm": 2.0243280016873584, "learning_rate": 1.3060425220878908e-06, "loss": 0.4097, "step": 1303 }, { "ETA": 3.43, "epoch": 0.41936002572760894, "fp16_scale": 1.0, "global_step": 1304, "grad_norm": 2.39146033148339, "learning_rate": 1.305050365325798e-06, "loss": 0.363, "step": 1304 }, { "ETA": 3.43, "epoch": 0.4196816208393632, "fp16_scale": 1.0, "global_step": 1305, "grad_norm": 1.9115903336157114, "learning_rate": 1.3040578773589056e-06, "loss": 0.4096, "step": 1305 }, { "ETA": 3.43, "epoch": 0.42000321595111756, "fp16_scale": 1.0, "global_step": 1306, "grad_norm": 2.237936064048709, "learning_rate": 1.3030650592647944e-06, "loss": 0.3962, "step": 1306 }, { "ETA": 3.43, "epoch": 0.42032481106287184, "fp16_scale": 1.0, "global_step": 1307, "grad_norm": 1.956406833792594, "learning_rate": 1.3020719121214054e-06, "loss": 0.4493, "step": 1307 }, { "ETA": 3.42, "epoch": 0.4206464061746261, "fp16_scale": 1.0, "global_step": 1308, "grad_norm": 2.053709038298677, "learning_rate": 1.3010784370070365e-06, "loss": 0.4337, "step": 1308 }, { "ETA": 3.42, "epoch": 0.42096800128638046, "fp16_scale": 1.0, "global_step": 1309, "grad_norm": 1.9529762868034433, "learning_rate": 1.3000846350003406e-06, "loss": 0.4373, "step": 1309 }, { "ETA": 3.42, "epoch": 0.42128959639813474, "fp16_scale": 1.0, "global_step": 1310, "grad_norm": 1.7562374110675865, "learning_rate": 1.2990905071803273e-06, "loss": 0.3747, "step": 1310 }, { "ETA": 3.42, "epoch": 0.42161119150988907, "fp16_scale": 1.0, "global_step": 1311, "grad_norm": 1.7062053296457569, "learning_rate": 1.2980960546263586e-06, "loss": 0.4122, "step": 1311 }, { "ETA": 3.41, "epoch": 0.42193278662164335, "fp16_scale": 1.0, "global_step": 1312, "grad_norm": 1.8269535633366947, "learning_rate": 1.2971012784181497e-06, "loss": 0.3574, "step": 1312 }, { "ETA": 3.41, "epoch": 0.42225438173339763, "fp16_scale": 1.0, "global_step": 1313, "grad_norm": 1.8215195588670925, "learning_rate": 1.296106179635767e-06, "loss": 0.4707, "step": 1313 }, { "ETA": 3.41, "epoch": 0.42257597684515197, "fp16_scale": 1.0, "global_step": 1314, "grad_norm": 1.7891525419939078, "learning_rate": 1.295110759359627e-06, "loss": 0.3745, "step": 1314 }, { "ETA": 3.41, "epoch": 0.42289757195690625, "fp16_scale": 1.0, "global_step": 1315, "grad_norm": 1.6739666894383969, "learning_rate": 1.2941150186704953e-06, "loss": 0.4538, "step": 1315 }, { "ETA": 3.41, "epoch": 0.42321916706866053, "fp16_scale": 1.0, "global_step": 1316, "grad_norm": 1.879052796438246, "learning_rate": 1.2931189586494857e-06, "loss": 0.4704, "step": 1316 }, { "ETA": 3.4, "epoch": 0.42354076218041486, "fp16_scale": 1.0, "global_step": 1317, "grad_norm": 2.075420152410664, "learning_rate": 1.2921225803780586e-06, "loss": 0.4354, "step": 1317 }, { "ETA": 3.4, "epoch": 0.42386235729216915, "fp16_scale": 1.0, "global_step": 1318, "grad_norm": 1.9809921057553515, "learning_rate": 1.2911258849380198e-06, "loss": 0.4578, "step": 1318 }, { "ETA": 3.4, "epoch": 0.4241839524039235, "fp16_scale": 1.0, "global_step": 1319, "grad_norm": 1.9549169653914598, "learning_rate": 1.2901288734115193e-06, "loss": 0.3984, "step": 1319 }, { "ETA": 3.4, "epoch": 0.42450554751567776, "fp16_scale": 1.0, "global_step": 1320, "grad_norm": 1.8708757161316962, "learning_rate": 1.2891315468810504e-06, "loss": 0.3808, "step": 1320 }, { "ETA": 3.4, "epoch": 0.42482714262743204, "fp16_scale": 1.0, "global_step": 1321, "grad_norm": 2.0241645347316615, "learning_rate": 1.288133906429449e-06, "loss": 0.3758, "step": 1321 }, { "ETA": 3.4, "epoch": 0.4251487377391864, "fp16_scale": 1.0, "global_step": 1322, "grad_norm": 2.030835180450959, "learning_rate": 1.2871359531398909e-06, "loss": 0.4313, "step": 1322 }, { "ETA": 3.39, "epoch": 0.42547033285094066, "fp16_scale": 1.0, "global_step": 1323, "grad_norm": 2.041273747447046, "learning_rate": 1.2861376880958922e-06, "loss": 0.3822, "step": 1323 }, { "ETA": 3.39, "epoch": 0.425791927962695, "fp16_scale": 1.0, "global_step": 1324, "grad_norm": 1.9113739253795483, "learning_rate": 1.2851391123813073e-06, "loss": 0.4825, "step": 1324 }, { "ETA": 3.39, "epoch": 0.4261135230744493, "fp16_scale": 1.0, "global_step": 1325, "grad_norm": 1.9341991945682349, "learning_rate": 1.2841402270803276e-06, "loss": 0.5072, "step": 1325 }, { "ETA": 3.39, "epoch": 0.42643511818620355, "fp16_scale": 1.0, "global_step": 1326, "grad_norm": 2.098332183595364, "learning_rate": 1.283141033277481e-06, "loss": 0.4842, "step": 1326 }, { "ETA": 3.39, "epoch": 0.4267567132979579, "fp16_scale": 1.0, "global_step": 1327, "grad_norm": 1.8718407825339272, "learning_rate": 1.2821415320576307e-06, "loss": 0.4722, "step": 1327 }, { "ETA": 3.39, "epoch": 0.42707830840971217, "fp16_scale": 1.0, "global_step": 1328, "grad_norm": 1.8864295150142696, "learning_rate": 1.2811417245059729e-06, "loss": 0.5018, "step": 1328 }, { "ETA": 3.38, "epoch": 0.42739990352146645, "fp16_scale": 1.0, "global_step": 1329, "grad_norm": 1.9084232296496746, "learning_rate": 1.2801416117080364e-06, "loss": 0.4172, "step": 1329 }, { "ETA": 3.38, "epoch": 0.4277214986332208, "fp16_scale": 1.0, "global_step": 1330, "grad_norm": 1.9012788737997843, "learning_rate": 1.2791411947496827e-06, "loss": 0.4131, "step": 1330 }, { "ETA": 3.38, "epoch": 0.42804309374497507, "fp16_scale": 1.0, "global_step": 1331, "grad_norm": 2.1323509076818334, "learning_rate": 1.2781404747171016e-06, "loss": 0.4389, "step": 1331 }, { "ETA": 3.38, "epoch": 0.4283646888567294, "fp16_scale": 1.0, "global_step": 1332, "grad_norm": 2.2654401958205197, "learning_rate": 1.2771394526968133e-06, "loss": 0.4359, "step": 1332 }, { "ETA": 3.38, "epoch": 0.4286862839684837, "fp16_scale": 1.0, "global_step": 1333, "grad_norm": 1.7762420788935014, "learning_rate": 1.2761381297756658e-06, "loss": 0.422, "step": 1333 }, { "ETA": 3.38, "epoch": 0.42900787908023796, "fp16_scale": 1.0, "global_step": 1334, "grad_norm": 2.0063885492786806, "learning_rate": 1.2751365070408334e-06, "loss": 0.4716, "step": 1334 }, { "ETA": 3.37, "epoch": 0.4293294741919923, "fp16_scale": 1.0, "global_step": 1335, "grad_norm": 2.1038221381344164, "learning_rate": 1.2741345855798159e-06, "loss": 0.4982, "step": 1335 }, { "ETA": 3.37, "epoch": 0.4296510693037466, "fp16_scale": 1.0, "global_step": 1336, "grad_norm": 1.95117683758303, "learning_rate": 1.273132366480438e-06, "loss": 0.4467, "step": 1336 }, { "ETA": 3.37, "epoch": 0.42997266441550086, "fp16_scale": 1.0, "global_step": 1337, "grad_norm": 2.0886909752284852, "learning_rate": 1.2721298508308463e-06, "loss": 0.4383, "step": 1337 }, { "ETA": 3.37, "epoch": 0.4302942595272552, "fp16_scale": 1.0, "global_step": 1338, "grad_norm": 1.961099516106727, "learning_rate": 1.271127039719511e-06, "loss": 0.477, "step": 1338 }, { "ETA": 3.37, "epoch": 0.4306158546390095, "fp16_scale": 1.0, "global_step": 1339, "grad_norm": 2.32048606376842, "learning_rate": 1.270123934235222e-06, "loss": 0.4757, "step": 1339 }, { "ETA": 3.37, "epoch": 0.4309374497507638, "fp16_scale": 1.0, "global_step": 1340, "grad_norm": 2.011952545459391, "learning_rate": 1.2691205354670892e-06, "loss": 0.4333, "step": 1340 }, { "ETA": 3.36, "epoch": 0.4312590448625181, "fp16_scale": 1.0, "global_step": 1341, "grad_norm": 2.1176101531453697, "learning_rate": 1.2681168445045412e-06, "loss": 0.3868, "step": 1341 }, { "ETA": 3.36, "epoch": 0.4315806399742724, "fp16_scale": 1.0, "global_step": 1342, "grad_norm": 2.0411025678349266, "learning_rate": 1.2671128624373229e-06, "loss": 0.4032, "step": 1342 }, { "ETA": 3.36, "epoch": 0.4319022350860267, "fp16_scale": 1.0, "global_step": 1343, "grad_norm": 2.0078757906643276, "learning_rate": 1.2661085903554962e-06, "loss": 0.3884, "step": 1343 }, { "ETA": 3.36, "epoch": 0.432223830197781, "fp16_scale": 1.0, "global_step": 1344, "grad_norm": 1.961133658667528, "learning_rate": 1.2651040293494376e-06, "loss": 0.4223, "step": 1344 }, { "ETA": 3.35, "epoch": 0.43254542530953527, "fp16_scale": 1.0, "global_step": 1345, "grad_norm": 1.975805112272552, "learning_rate": 1.2640991805098366e-06, "loss": 0.4567, "step": 1345 }, { "ETA": 3.35, "epoch": 0.4328670204212896, "fp16_scale": 1.0, "global_step": 1346, "grad_norm": 2.1325255765239355, "learning_rate": 1.2630940449276968e-06, "loss": 0.5047, "step": 1346 }, { "ETA": 3.35, "epoch": 0.4331886155330439, "fp16_scale": 1.0, "global_step": 1347, "grad_norm": 2.0655733195455377, "learning_rate": 1.262088623694332e-06, "loss": 0.5378, "step": 1347 }, { "ETA": 3.35, "epoch": 0.4335102106447982, "fp16_scale": 1.0, "global_step": 1348, "grad_norm": 2.135734443396966, "learning_rate": 1.2610829179013657e-06, "loss": 0.4077, "step": 1348 }, { "ETA": 3.35, "epoch": 0.4338318057565525, "fp16_scale": 1.0, "global_step": 1349, "grad_norm": 2.1472325469193976, "learning_rate": 1.2600769286407317e-06, "loss": 0.3683, "step": 1349 }, { "ETA": 3.34, "epoch": 0.4341534008683068, "fp16_scale": 1.0, "global_step": 1350, "grad_norm": 1.8851290970225403, "learning_rate": 1.2590706570046704e-06, "loss": 0.3661, "step": 1350 }, { "ETA": 3.34, "epoch": 0.4344749959800611, "fp16_scale": 1.0, "global_step": 1351, "grad_norm": 2.1279825620137616, "learning_rate": 1.2580641040857294e-06, "loss": 0.426, "step": 1351 }, { "ETA": 3.34, "epoch": 0.4347965910918154, "fp16_scale": 1.0, "global_step": 1352, "grad_norm": 1.8516315571203608, "learning_rate": 1.2570572709767616e-06, "loss": 0.4227, "step": 1352 }, { "ETA": 3.34, "epoch": 0.4351181862035697, "fp16_scale": 1.0, "global_step": 1353, "grad_norm": 1.8200937623381561, "learning_rate": 1.2560501587709238e-06, "loss": 0.3648, "step": 1353 }, { "ETA": 3.34, "epoch": 0.435439781315324, "fp16_scale": 1.0, "global_step": 1354, "grad_norm": 2.0206700029119955, "learning_rate": 1.2550427685616764e-06, "loss": 0.3636, "step": 1354 }, { "ETA": 3.33, "epoch": 0.4357613764270783, "fp16_scale": 1.0, "global_step": 1355, "grad_norm": 1.9238979778464935, "learning_rate": 1.2540351014427813e-06, "loss": 0.4101, "step": 1355 }, { "ETA": 3.33, "epoch": 0.43608297153883263, "fp16_scale": 1.0, "global_step": 1356, "grad_norm": 2.278431500277815, "learning_rate": 1.2530271585083006e-06, "loss": 0.4308, "step": 1356 }, { "ETA": 3.33, "epoch": 0.4364045666505869, "fp16_scale": 1.0, "global_step": 1357, "grad_norm": 1.9638468459383565, "learning_rate": 1.252018940852597e-06, "loss": 0.492, "step": 1357 }, { "ETA": 3.33, "epoch": 0.4367261617623412, "fp16_scale": 1.0, "global_step": 1358, "grad_norm": 2.0124603726439796, "learning_rate": 1.2510104495703304e-06, "loss": 0.476, "step": 1358 }, { "ETA": 3.33, "epoch": 0.4370477568740955, "fp16_scale": 1.0, "global_step": 1359, "grad_norm": 1.968831758094997, "learning_rate": 1.2500016857564584e-06, "loss": 0.4057, "step": 1359 }, { "ETA": 3.32, "epoch": 0.4373693519858498, "fp16_scale": 1.0, "global_step": 1360, "grad_norm": 2.197815710520745, "learning_rate": 1.248992650506234e-06, "loss": 0.4393, "step": 1360 }, { "ETA": 3.32, "epoch": 0.43769094709760414, "fp16_scale": 1.0, "global_step": 1361, "grad_norm": 2.1321524386912087, "learning_rate": 1.2479833449152054e-06, "loss": 0.4704, "step": 1361 }, { "ETA": 3.32, "epoch": 0.4380125422093584, "fp16_scale": 1.0, "global_step": 1362, "grad_norm": 1.832170751052816, "learning_rate": 1.2469737700792143e-06, "loss": 0.4231, "step": 1362 }, { "ETA": 3.32, "epoch": 0.4383341373211127, "fp16_scale": 1.0, "global_step": 1363, "grad_norm": 2.2259342027789617, "learning_rate": 1.2459639270943943e-06, "loss": 0.3483, "step": 1363 }, { "ETA": 3.32, "epoch": 0.43865573243286704, "fp16_scale": 1.0, "global_step": 1364, "grad_norm": 1.8654567434816682, "learning_rate": 1.2449538170571705e-06, "loss": 0.5051, "step": 1364 }, { "ETA": 3.31, "epoch": 0.4389773275446213, "fp16_scale": 1.0, "global_step": 1365, "grad_norm": 2.201363172831595, "learning_rate": 1.2439434410642578e-06, "loss": 0.477, "step": 1365 }, { "ETA": 3.31, "epoch": 0.4392989226563756, "fp16_scale": 1.0, "global_step": 1366, "grad_norm": 2.1914787785831225, "learning_rate": 1.2429328002126599e-06, "loss": 0.4851, "step": 1366 }, { "ETA": 3.31, "epoch": 0.43962051776812994, "fp16_scale": 1.0, "global_step": 1367, "grad_norm": 2.1813556026033973, "learning_rate": 1.2419218955996676e-06, "loss": 0.4766, "step": 1367 }, { "ETA": 3.31, "epoch": 0.4399421128798842, "fp16_scale": 1.0, "global_step": 1368, "grad_norm": 1.9518526521459842, "learning_rate": 1.2409107283228595e-06, "loss": 0.4245, "step": 1368 }, { "ETA": 3.31, "epoch": 0.44026370799163855, "fp16_scale": 1.0, "global_step": 1369, "grad_norm": 2.1292985482112, "learning_rate": 1.239899299480098e-06, "loss": 0.4366, "step": 1369 }, { "ETA": 3.3, "epoch": 0.44058530310339283, "fp16_scale": 1.0, "global_step": 1370, "grad_norm": 1.8007064142102165, "learning_rate": 1.2388876101695293e-06, "loss": 0.4136, "step": 1370 }, { "ETA": 3.3, "epoch": 0.4409068982151471, "fp16_scale": 1.0, "global_step": 1371, "grad_norm": 1.8308364506958799, "learning_rate": 1.2378756614895841e-06, "loss": 0.3975, "step": 1371 }, { "ETA": 3.3, "epoch": 0.44122849332690145, "fp16_scale": 1.0, "global_step": 1372, "grad_norm": 2.030996011863319, "learning_rate": 1.2368634545389732e-06, "loss": 0.3726, "step": 1372 }, { "ETA": 3.3, "epoch": 0.44155008843865573, "fp16_scale": 1.0, "global_step": 1373, "grad_norm": 2.1848106243679775, "learning_rate": 1.2358509904166875e-06, "loss": 0.3896, "step": 1373 }, { "ETA": 3.3, "epoch": 0.44187168355041, "fp16_scale": 1.0, "global_step": 1374, "grad_norm": 2.1326047482073025, "learning_rate": 1.2348382702219988e-06, "loss": 0.5802, "step": 1374 }, { "ETA": 3.29, "epoch": 0.44219327866216435, "fp16_scale": 1.0, "global_step": 1375, "grad_norm": 1.8792653832315627, "learning_rate": 1.2338252950544559e-06, "loss": 0.3792, "step": 1375 }, { "ETA": 3.29, "epoch": 0.4425148737739186, "fp16_scale": 1.0, "global_step": 1376, "grad_norm": 1.8910372647492621, "learning_rate": 1.2328120660138842e-06, "loss": 0.4238, "step": 1376 }, { "ETA": 3.29, "epoch": 0.44283646888567296, "fp16_scale": 1.0, "global_step": 1377, "grad_norm": 1.9051361184992184, "learning_rate": 1.2317985842003849e-06, "loss": 0.3798, "step": 1377 }, { "ETA": 3.29, "epoch": 0.44315806399742724, "fp16_scale": 1.0, "global_step": 1378, "grad_norm": 1.8431495442965877, "learning_rate": 1.2307848507143338e-06, "loss": 0.4537, "step": 1378 }, { "ETA": 3.28, "epoch": 0.4434796591091815, "fp16_scale": 1.0, "global_step": 1379, "grad_norm": 2.224381994362364, "learning_rate": 1.229770866656381e-06, "loss": 0.4025, "step": 1379 }, { "ETA": 3.28, "epoch": 0.44380125422093586, "fp16_scale": 1.0, "global_step": 1380, "grad_norm": 1.9056671272062469, "learning_rate": 1.2287566331274464e-06, "loss": 0.4767, "step": 1380 }, { "ETA": 3.28, "epoch": 0.44412284933269014, "fp16_scale": 1.0, "global_step": 1381, "grad_norm": 1.857981416086925, "learning_rate": 1.2277421512287224e-06, "loss": 0.3869, "step": 1381 }, { "ETA": 3.28, "epoch": 0.4444444444444444, "fp16_scale": 1.0, "global_step": 1382, "grad_norm": 2.016952793287992, "learning_rate": 1.2267274220616708e-06, "loss": 0.4624, "step": 1382 }, { "ETA": 3.28, "epoch": 0.44476603955619876, "fp16_scale": 1.0, "global_step": 1383, "grad_norm": 2.2381982451702114, "learning_rate": 1.2257124467280214e-06, "loss": 0.4949, "step": 1383 }, { "ETA": 3.28, "epoch": 0.44508763466795304, "fp16_scale": 1.0, "global_step": 1384, "grad_norm": 1.8812941469093638, "learning_rate": 1.2246972263297717e-06, "loss": 0.4395, "step": 1384 }, { "ETA": 3.27, "epoch": 0.44540922977970737, "fp16_scale": 1.0, "global_step": 1385, "grad_norm": 1.8286698683678104, "learning_rate": 1.2236817619691853e-06, "loss": 0.416, "step": 1385 }, { "ETA": 3.27, "epoch": 0.44573082489146165, "fp16_scale": 1.0, "global_step": 1386, "grad_norm": 2.0179511134804606, "learning_rate": 1.2226660547487903e-06, "loss": 0.4194, "step": 1386 }, { "ETA": 3.27, "epoch": 0.44605242000321593, "fp16_scale": 1.0, "global_step": 1387, "grad_norm": 1.9439917510050146, "learning_rate": 1.2216501057713788e-06, "loss": 0.4202, "step": 1387 }, { "ETA": 3.27, "epoch": 0.44637401511497027, "fp16_scale": 1.0, "global_step": 1388, "grad_norm": 1.9922919677500652, "learning_rate": 1.2206339161400057e-06, "loss": 0.4109, "step": 1388 }, { "ETA": 3.27, "epoch": 0.44669561022672455, "fp16_scale": 1.0, "global_step": 1389, "grad_norm": 1.8899473270409248, "learning_rate": 1.219617486957986e-06, "loss": 0.4214, "step": 1389 }, { "ETA": 3.26, "epoch": 0.44701720533847883, "fp16_scale": 1.0, "global_step": 1390, "grad_norm": 1.940836601527028, "learning_rate": 1.218600819328896e-06, "loss": 0.4491, "step": 1390 }, { "ETA": 3.26, "epoch": 0.44733880045023316, "fp16_scale": 1.0, "global_step": 1391, "grad_norm": 2.0663114680502717, "learning_rate": 1.2175839143565707e-06, "loss": 0.3891, "step": 1391 }, { "ETA": 3.26, "epoch": 0.44766039556198745, "fp16_scale": 1.0, "global_step": 1392, "grad_norm": 1.9132479197270338, "learning_rate": 1.2165667731451024e-06, "loss": 0.5532, "step": 1392 }, { "ETA": 3.26, "epoch": 0.4479819906737418, "fp16_scale": 1.0, "global_step": 1393, "grad_norm": 1.6813844363293866, "learning_rate": 1.2155493967988394e-06, "loss": 0.3854, "step": 1393 }, { "ETA": 3.26, "epoch": 0.44830358578549606, "fp16_scale": 1.0, "global_step": 1394, "grad_norm": 1.7477356715026746, "learning_rate": 1.2145317864223873e-06, "loss": 0.3777, "step": 1394 }, { "ETA": 3.26, "epoch": 0.44862518089725034, "fp16_scale": 1.0, "global_step": 1395, "grad_norm": 2.1168924114259022, "learning_rate": 1.2135139431206029e-06, "loss": 0.4345, "step": 1395 }, { "ETA": 3.25, "epoch": 0.4489467760090047, "fp16_scale": 1.0, "global_step": 1396, "grad_norm": 2.285953739879689, "learning_rate": 1.2124958679985987e-06, "loss": 0.4351, "step": 1396 }, { "ETA": 3.25, "epoch": 0.44926837112075896, "fp16_scale": 1.0, "global_step": 1397, "grad_norm": 2.168364924134388, "learning_rate": 1.211477562161737e-06, "loss": 0.3574, "step": 1397 }, { "ETA": 3.25, "epoch": 0.44958996623251324, "fp16_scale": 1.0, "global_step": 1398, "grad_norm": 1.8445238015044143, "learning_rate": 1.2104590267156312e-06, "loss": 0.4202, "step": 1398 }, { "ETA": 3.25, "epoch": 0.4499115613442676, "fp16_scale": 1.0, "global_step": 1399, "grad_norm": 1.8589049162779738, "learning_rate": 1.2094402627661446e-06, "loss": 0.4862, "step": 1399 }, { "ETA": 3.25, "epoch": 0.45023315645602185, "fp16_scale": 1.0, "global_step": 1400, "grad_norm": 1.925609416627876, "learning_rate": 1.2084212714193873e-06, "loss": 0.4691, "step": 1400 }, { "ETA": 3.26, "epoch": 0.4505547515677762, "fp16_scale": 1.0, "global_step": 1401, "grad_norm": 1.9467422776423138, "learning_rate": 1.2074020537817174e-06, "loss": 0.4251, "step": 1401 }, { "ETA": 3.26, "epoch": 0.45087634667953047, "fp16_scale": 1.0, "global_step": 1402, "grad_norm": 2.0494031390059955, "learning_rate": 1.2063826109597381e-06, "loss": 0.4947, "step": 1402 }, { "ETA": 3.25, "epoch": 0.45119794179128475, "fp16_scale": 1.0, "global_step": 1403, "grad_norm": 1.8729328694911644, "learning_rate": 1.2053629440602977e-06, "loss": 0.4429, "step": 1403 }, { "ETA": 3.25, "epoch": 0.4515195369030391, "fp16_scale": 1.0, "global_step": 1404, "grad_norm": 1.803922890212383, "learning_rate": 1.2043430541904869e-06, "loss": 0.4167, "step": 1404 }, { "ETA": 3.25, "epoch": 0.45184113201479337, "fp16_scale": 1.0, "global_step": 1405, "grad_norm": 1.8310438597730947, "learning_rate": 1.2033229424576394e-06, "loss": 0.453, "step": 1405 }, { "ETA": 3.25, "epoch": 0.4521627271265477, "fp16_scale": 1.0, "global_step": 1406, "grad_norm": 1.9179874872619675, "learning_rate": 1.2023026099693292e-06, "loss": 0.3701, "step": 1406 }, { "ETA": 3.25, "epoch": 0.452484322238302, "fp16_scale": 1.0, "global_step": 1407, "grad_norm": 1.8600064163645063, "learning_rate": 1.20128205783337e-06, "loss": 0.383, "step": 1407 }, { "ETA": 3.25, "epoch": 0.45280591735005626, "fp16_scale": 1.0, "global_step": 1408, "grad_norm": 1.9029987264933161, "learning_rate": 1.2002612871578141e-06, "loss": 0.4902, "step": 1408 }, { "ETA": 3.24, "epoch": 0.4531275124618106, "fp16_scale": 1.0, "global_step": 1409, "grad_norm": 2.0699543502953985, "learning_rate": 1.1992402990509514e-06, "loss": 0.3728, "step": 1409 }, { "ETA": 3.24, "epoch": 0.4534491075735649, "fp16_scale": 1.0, "global_step": 1410, "grad_norm": 2.1131434946387633, "learning_rate": 1.1982190946213076e-06, "loss": 0.4799, "step": 1410 }, { "ETA": 3.24, "epoch": 0.45377070268531916, "fp16_scale": 1.0, "global_step": 1411, "grad_norm": 1.8855715674615943, "learning_rate": 1.197197674977643e-06, "loss": 0.3921, "step": 1411 }, { "ETA": 3.24, "epoch": 0.4540922977970735, "fp16_scale": 1.0, "global_step": 1412, "grad_norm": 1.8736848384438523, "learning_rate": 1.1961760412289516e-06, "loss": 0.4081, "step": 1412 }, { "ETA": 3.23, "epoch": 0.4544138929088278, "fp16_scale": 1.0, "global_step": 1413, "grad_norm": 2.0319908352667557, "learning_rate": 1.1951541944844606e-06, "loss": 0.418, "step": 1413 }, { "ETA": 3.23, "epoch": 0.4547354880205821, "fp16_scale": 1.0, "global_step": 1414, "grad_norm": 2.1642966866671673, "learning_rate": 1.1941321358536277e-06, "loss": 0.3824, "step": 1414 }, { "ETA": 3.23, "epoch": 0.4550570831323364, "fp16_scale": 1.0, "global_step": 1415, "grad_norm": 1.9719607908931798, "learning_rate": 1.1931098664461406e-06, "loss": 0.3655, "step": 1415 }, { "ETA": 3.22, "epoch": 0.4553786782440907, "fp16_scale": 1.0, "global_step": 1416, "grad_norm": 2.044659469680282, "learning_rate": 1.1920873873719166e-06, "loss": 0.3818, "step": 1416 }, { "ETA": 3.22, "epoch": 0.455700273355845, "fp16_scale": 1.0, "global_step": 1417, "grad_norm": 2.0818545976450205, "learning_rate": 1.1910646997411001e-06, "loss": 0.4219, "step": 1417 }, { "ETA": 3.22, "epoch": 0.4560218684675993, "fp16_scale": 1.0, "global_step": 1418, "grad_norm": 2.183461535181188, "learning_rate": 1.1900418046640614e-06, "loss": 0.3808, "step": 1418 }, { "ETA": 3.22, "epoch": 0.45634346357935357, "fp16_scale": 1.0, "global_step": 1419, "grad_norm": 2.1044355098668053, "learning_rate": 1.1890187032513976e-06, "loss": 0.4753, "step": 1419 }, { "ETA": 3.22, "epoch": 0.4566650586911079, "fp16_scale": 1.0, "global_step": 1420, "grad_norm": 1.9120578141345488, "learning_rate": 1.187995396613928e-06, "loss": 0.4635, "step": 1420 }, { "ETA": 3.22, "epoch": 0.4569866538028622, "fp16_scale": 1.0, "global_step": 1421, "grad_norm": 1.9900019777715063, "learning_rate": 1.1869718858626963e-06, "loss": 0.4125, "step": 1421 }, { "ETA": 3.21, "epoch": 0.4573082489146165, "fp16_scale": 1.0, "global_step": 1422, "grad_norm": 1.9526876565066824, "learning_rate": 1.1859481721089668e-06, "loss": 0.4535, "step": 1422 }, { "ETA": 3.21, "epoch": 0.4576298440263708, "fp16_scale": 1.0, "global_step": 1423, "grad_norm": 2.0529819147468453, "learning_rate": 1.1849242564642244e-06, "loss": 0.3939, "step": 1423 }, { "ETA": 3.21, "epoch": 0.4579514391381251, "fp16_scale": 1.0, "global_step": 1424, "grad_norm": 2.210405714780168, "learning_rate": 1.1839001400401736e-06, "loss": 0.392, "step": 1424 }, { "ETA": 3.21, "epoch": 0.4582730342498794, "fp16_scale": 1.0, "global_step": 1425, "grad_norm": 1.990997733736417, "learning_rate": 1.1828758239487362e-06, "loss": 0.4619, "step": 1425 }, { "ETA": 3.2, "epoch": 0.4585946293616337, "fp16_scale": 1.0, "global_step": 1426, "grad_norm": 1.7999255114643025, "learning_rate": 1.1818513093020513e-06, "loss": 0.4807, "step": 1426 }, { "ETA": 3.2, "epoch": 0.458916224473388, "fp16_scale": 1.0, "global_step": 1427, "grad_norm": 1.9179751151421047, "learning_rate": 1.1808265972124738e-06, "loss": 0.4581, "step": 1427 }, { "ETA": 3.2, "epoch": 0.4592378195851423, "fp16_scale": 1.0, "global_step": 1428, "grad_norm": 2.0878934873157715, "learning_rate": 1.1798016887925726e-06, "loss": 0.4967, "step": 1428 }, { "ETA": 3.2, "epoch": 0.4595594146968966, "fp16_scale": 1.0, "global_step": 1429, "grad_norm": 1.848865583148038, "learning_rate": 1.1787765851551296e-06, "loss": 0.4408, "step": 1429 }, { "ETA": 3.2, "epoch": 0.45988100980865093, "fp16_scale": 1.0, "global_step": 1430, "grad_norm": 1.9915372078204412, "learning_rate": 1.1777512874131386e-06, "loss": 0.4804, "step": 1430 }, { "ETA": 3.2, "epoch": 0.4602026049204052, "fp16_scale": 1.0, "global_step": 1431, "grad_norm": 1.7080786351610435, "learning_rate": 1.1767257966798048e-06, "loss": 0.4257, "step": 1431 }, { "ETA": 3.19, "epoch": 0.4605242000321595, "fp16_scale": 1.0, "global_step": 1432, "grad_norm": 2.0959852281717364, "learning_rate": 1.1757001140685426e-06, "loss": 0.4428, "step": 1432 }, { "ETA": 3.19, "epoch": 0.46084579514391383, "fp16_scale": 1.0, "global_step": 1433, "grad_norm": 2.0800453233062006, "learning_rate": 1.1746742406929745e-06, "loss": 0.4635, "step": 1433 }, { "ETA": 3.19, "epoch": 0.4611673902556681, "fp16_scale": 1.0, "global_step": 1434, "grad_norm": 1.9991439086645446, "learning_rate": 1.1736481776669305e-06, "loss": 0.4814, "step": 1434 }, { "ETA": 3.19, "epoch": 0.4614889853674224, "fp16_scale": 1.0, "global_step": 1435, "grad_norm": 2.0002709328065795, "learning_rate": 1.1726219261044459e-06, "loss": 0.4328, "step": 1435 }, { "ETA": 3.19, "epoch": 0.4618105804791767, "fp16_scale": 1.0, "global_step": 1436, "grad_norm": 2.03905864061523, "learning_rate": 1.1715954871197615e-06, "loss": 0.4916, "step": 1436 }, { "ETA": 3.18, "epoch": 0.462132175590931, "fp16_scale": 1.0, "global_step": 1437, "grad_norm": 1.8577890856786565, "learning_rate": 1.1705688618273209e-06, "loss": 0.3885, "step": 1437 }, { "ETA": 3.18, "epoch": 0.46245377070268534, "fp16_scale": 1.0, "global_step": 1438, "grad_norm": 2.1861978948056895, "learning_rate": 1.1695420513417705e-06, "loss": 0.394, "step": 1438 }, { "ETA": 3.18, "epoch": 0.4627753658144396, "fp16_scale": 1.0, "global_step": 1439, "grad_norm": 1.7540393312754712, "learning_rate": 1.1685150567779575e-06, "loss": 0.4458, "step": 1439 }, { "ETA": 3.18, "epoch": 0.4630969609261939, "fp16_scale": 1.0, "global_step": 1440, "grad_norm": 1.946279500911723, "learning_rate": 1.1674878792509293e-06, "loss": 0.4189, "step": 1440 }, { "ETA": 3.18, "epoch": 0.46341855603794824, "fp16_scale": 1.0, "global_step": 1441, "grad_norm": 1.978765866295056, "learning_rate": 1.1664605198759312e-06, "loss": 0.4118, "step": 1441 }, { "ETA": 3.18, "epoch": 0.4637401511497025, "fp16_scale": 1.0, "global_step": 1442, "grad_norm": 1.9550889217653187, "learning_rate": 1.1654329797684065e-06, "loss": 0.3716, "step": 1442 }, { "ETA": 3.17, "epoch": 0.46406174626145685, "fp16_scale": 1.0, "global_step": 1443, "grad_norm": 2.150992481802672, "learning_rate": 1.1644052600439947e-06, "loss": 0.3508, "step": 1443 }, { "ETA": 3.17, "epoch": 0.46438334137321113, "fp16_scale": 1.0, "global_step": 1444, "grad_norm": 1.673795590015164, "learning_rate": 1.1633773618185302e-06, "loss": 0.3795, "step": 1444 }, { "ETA": 3.17, "epoch": 0.4647049364849654, "fp16_scale": 1.0, "global_step": 1445, "grad_norm": 1.9830135159880347, "learning_rate": 1.1623492862080412e-06, "loss": 0.4145, "step": 1445 }, { "ETA": 3.17, "epoch": 0.46502653159671975, "fp16_scale": 1.0, "global_step": 1446, "grad_norm": 2.02563990801057, "learning_rate": 1.1613210343287492e-06, "loss": 0.4524, "step": 1446 }, { "ETA": 3.17, "epoch": 0.46534812670847403, "fp16_scale": 1.0, "global_step": 1447, "grad_norm": 2.0165226085304058, "learning_rate": 1.1602926072970654e-06, "loss": 0.4508, "step": 1447 }, { "ETA": 3.16, "epoch": 0.4656697218202283, "fp16_scale": 1.0, "global_step": 1448, "grad_norm": 1.8882974099870768, "learning_rate": 1.1592640062295927e-06, "loss": 0.4783, "step": 1448 }, { "ETA": 3.16, "epoch": 0.46599131693198265, "fp16_scale": 1.0, "global_step": 1449, "grad_norm": 2.0057867899838313, "learning_rate": 1.1582352322431225e-06, "loss": 0.3945, "step": 1449 }, { "ETA": 3.16, "epoch": 0.4663129120437369, "fp16_scale": 1.0, "global_step": 1450, "grad_norm": 1.8841958050366507, "learning_rate": 1.157206286454634e-06, "loss": 0.3801, "step": 1450 }, { "ETA": 3.16, "epoch": 0.46663450715549126, "fp16_scale": 1.0, "global_step": 1451, "grad_norm": 1.8272961060829263, "learning_rate": 1.1561771699812922e-06, "loss": 0.4317, "step": 1451 }, { "ETA": 3.16, "epoch": 0.46695610226724554, "fp16_scale": 1.0, "global_step": 1452, "grad_norm": 1.6842338631717175, "learning_rate": 1.1551478839404494e-06, "loss": 0.4365, "step": 1452 }, { "ETA": 3.16, "epoch": 0.4672776973789998, "fp16_scale": 1.0, "global_step": 1453, "grad_norm": 2.0903691759693372, "learning_rate": 1.1541184294496391e-06, "loss": 0.402, "step": 1453 }, { "ETA": 3.15, "epoch": 0.46759929249075416, "fp16_scale": 1.0, "global_step": 1454, "grad_norm": 1.7907585193865998, "learning_rate": 1.15308880762658e-06, "loss": 0.4354, "step": 1454 }, { "ETA": 3.15, "epoch": 0.46792088760250844, "fp16_scale": 1.0, "global_step": 1455, "grad_norm": 2.072868760983511, "learning_rate": 1.152059019589172e-06, "loss": 0.41, "step": 1455 }, { "ETA": 3.15, "epoch": 0.4682424827142627, "fp16_scale": 1.0, "global_step": 1456, "grad_norm": 1.9350679455167799, "learning_rate": 1.1510290664554941e-06, "loss": 0.379, "step": 1456 }, { "ETA": 3.15, "epoch": 0.46856407782601706, "fp16_scale": 1.0, "global_step": 1457, "grad_norm": 2.0604895433760464, "learning_rate": 1.1499989493438074e-06, "loss": 0.3475, "step": 1457 }, { "ETA": 3.14, "epoch": 0.46888567293777134, "fp16_scale": 1.0, "global_step": 1458, "grad_norm": 1.8593424610855758, "learning_rate": 1.1489686693725478e-06, "loss": 0.4129, "step": 1458 }, { "ETA": 3.14, "epoch": 0.46920726804952567, "fp16_scale": 1.0, "global_step": 1459, "grad_norm": 2.0070537049832047, "learning_rate": 1.1479382276603299e-06, "loss": 0.3448, "step": 1459 }, { "ETA": 3.14, "epoch": 0.46952886316127995, "fp16_scale": 1.0, "global_step": 1460, "grad_norm": 1.643190458066208, "learning_rate": 1.1469076253259438e-06, "loss": 0.3631, "step": 1460 }, { "ETA": 3.14, "epoch": 0.46985045827303423, "fp16_scale": 1.0, "global_step": 1461, "grad_norm": 2.070470565712216, "learning_rate": 1.1458768634883534e-06, "loss": 0.4025, "step": 1461 }, { "ETA": 3.13, "epoch": 0.47017205338478857, "fp16_scale": 1.0, "global_step": 1462, "grad_norm": 1.9505825330705497, "learning_rate": 1.1448459432666959e-06, "loss": 0.4438, "step": 1462 }, { "ETA": 3.13, "epoch": 0.47049364849654285, "fp16_scale": 1.0, "global_step": 1463, "grad_norm": 1.997402714028704, "learning_rate": 1.1438148657802814e-06, "loss": 0.4514, "step": 1463 }, { "ETA": 3.13, "epoch": 0.47081524360829713, "fp16_scale": 1.0, "global_step": 1464, "grad_norm": 2.0821132050682305, "learning_rate": 1.1427836321485895e-06, "loss": 0.3976, "step": 1464 }, { "ETA": 3.13, "epoch": 0.47113683872005147, "fp16_scale": 1.0, "global_step": 1465, "grad_norm": 1.9902860827448616, "learning_rate": 1.14175224349127e-06, "loss": 0.4509, "step": 1465 }, { "ETA": 3.13, "epoch": 0.47145843383180575, "fp16_scale": 1.0, "global_step": 1466, "grad_norm": 2.0287431890460694, "learning_rate": 1.1407207009281402e-06, "loss": 0.4753, "step": 1466 }, { "ETA": 3.12, "epoch": 0.4717800289435601, "fp16_scale": 1.0, "global_step": 1467, "grad_norm": 2.1874441731605354, "learning_rate": 1.1396890055791862e-06, "loss": 0.456, "step": 1467 }, { "ETA": 3.12, "epoch": 0.47210162405531436, "fp16_scale": 1.0, "global_step": 1468, "grad_norm": 2.019457884880683, "learning_rate": 1.1386571585645579e-06, "loss": 0.3394, "step": 1468 }, { "ETA": 3.12, "epoch": 0.47242321916706864, "fp16_scale": 1.0, "global_step": 1469, "grad_norm": 1.9878429225127454, "learning_rate": 1.137625161004572e-06, "loss": 0.419, "step": 1469 }, { "ETA": 3.12, "epoch": 0.472744814278823, "fp16_scale": 1.0, "global_step": 1470, "grad_norm": 1.8582254062968473, "learning_rate": 1.1365930140197066e-06, "loss": 0.4814, "step": 1470 }, { "ETA": 3.12, "epoch": 0.47306640939057726, "fp16_scale": 1.0, "global_step": 1471, "grad_norm": 2.2421415923963215, "learning_rate": 1.1355607187306036e-06, "loss": 0.3864, "step": 1471 }, { "ETA": 3.11, "epoch": 0.47338800450233154, "fp16_scale": 1.0, "global_step": 1472, "grad_norm": 2.0632495122510583, "learning_rate": 1.1345282762580649e-06, "loss": 0.419, "step": 1472 }, { "ETA": 3.11, "epoch": 0.4737095996140859, "fp16_scale": 1.0, "global_step": 1473, "grad_norm": 1.9216519384670905, "learning_rate": 1.1334956877230527e-06, "loss": 0.4142, "step": 1473 }, { "ETA": 3.11, "epoch": 0.47403119472584015, "fp16_scale": 1.0, "global_step": 1474, "grad_norm": 1.9825823580110198, "learning_rate": 1.132462954246688e-06, "loss": 0.4102, "step": 1474 }, { "ETA": 3.11, "epoch": 0.4743527898375945, "fp16_scale": 1.0, "global_step": 1475, "grad_norm": 1.9049563681116846, "learning_rate": 1.1314300769502485e-06, "loss": 0.4553, "step": 1475 }, { "ETA": 3.11, "epoch": 0.47467438494934877, "fp16_scale": 1.0, "global_step": 1476, "grad_norm": 1.9691137154721197, "learning_rate": 1.130397056955169e-06, "loss": 0.4579, "step": 1476 }, { "ETA": 3.1, "epoch": 0.47499598006110305, "fp16_scale": 1.0, "global_step": 1477, "grad_norm": 2.1773439122015863, "learning_rate": 1.1293638953830378e-06, "loss": 0.3821, "step": 1477 }, { "ETA": 3.1, "epoch": 0.4753175751728574, "fp16_scale": 1.0, "global_step": 1478, "grad_norm": 1.910864823603078, "learning_rate": 1.1283305933555984e-06, "loss": 0.4269, "step": 1478 }, { "ETA": 3.1, "epoch": 0.47563917028461167, "fp16_scale": 1.0, "global_step": 1479, "grad_norm": 2.045515841306016, "learning_rate": 1.1272971519947457e-06, "loss": 0.4844, "step": 1479 }, { "ETA": 3.1, "epoch": 0.47596076539636595, "fp16_scale": 1.0, "global_step": 1480, "grad_norm": 1.8911679526676894, "learning_rate": 1.126263572422527e-06, "loss": 0.4142, "step": 1480 }, { "ETA": 3.1, "epoch": 0.4762823605081203, "fp16_scale": 1.0, "global_step": 1481, "grad_norm": 1.9845520254385136, "learning_rate": 1.1252298557611383e-06, "loss": 0.456, "step": 1481 }, { "ETA": 3.1, "epoch": 0.47660395561987456, "fp16_scale": 1.0, "global_step": 1482, "grad_norm": 1.908810677619066, "learning_rate": 1.124196003132926e-06, "loss": 0.508, "step": 1482 }, { "ETA": 3.09, "epoch": 0.4769255507316289, "fp16_scale": 1.0, "global_step": 1483, "grad_norm": 1.771266096955992, "learning_rate": 1.1231620156603823e-06, "loss": 0.4114, "step": 1483 }, { "ETA": 3.09, "epoch": 0.4772471458433832, "fp16_scale": 1.0, "global_step": 1484, "grad_norm": 2.2235534069634197, "learning_rate": 1.1221278944661472e-06, "loss": 0.4922, "step": 1484 }, { "ETA": 3.09, "epoch": 0.47756874095513746, "fp16_scale": 1.0, "global_step": 1485, "grad_norm": 1.868268949487213, "learning_rate": 1.1210936406730058e-06, "loss": 0.321, "step": 1485 }, { "ETA": 3.09, "epoch": 0.4778903360668918, "fp16_scale": 1.0, "global_step": 1486, "grad_norm": 2.1301065625680717, "learning_rate": 1.1200592554038865e-06, "loss": 0.425, "step": 1486 }, { "ETA": 3.09, "epoch": 0.4782119311786461, "fp16_scale": 1.0, "global_step": 1487, "grad_norm": 2.1376344932652596, "learning_rate": 1.1190247397818606e-06, "loss": 0.4601, "step": 1487 }, { "ETA": 3.08, "epoch": 0.4785335262904004, "fp16_scale": 1.0, "global_step": 1488, "grad_norm": 1.8644548869345658, "learning_rate": 1.1179900949301417e-06, "loss": 0.4582, "step": 1488 }, { "ETA": 3.08, "epoch": 0.4788551214021547, "fp16_scale": 1.0, "global_step": 1489, "grad_norm": 2.064205355643897, "learning_rate": 1.1169553219720826e-06, "loss": 0.3806, "step": 1489 }, { "ETA": 3.08, "epoch": 0.479176716513909, "fp16_scale": 1.0, "global_step": 1490, "grad_norm": 1.9404339758716016, "learning_rate": 1.1159204220311756e-06, "loss": 0.5099, "step": 1490 }, { "ETA": 3.08, "epoch": 0.4794983116256633, "fp16_scale": 1.0, "global_step": 1491, "grad_norm": 1.750338191909074, "learning_rate": 1.1148853962310516e-06, "loss": 0.396, "step": 1491 }, { "ETA": 3.08, "epoch": 0.4798199067374176, "fp16_scale": 1.0, "global_step": 1492, "grad_norm": 1.8108943152358827, "learning_rate": 1.113850245695477e-06, "loss": 0.4023, "step": 1492 }, { "ETA": 3.07, "epoch": 0.48014150184917187, "fp16_scale": 1.0, "global_step": 1493, "grad_norm": 2.205008750952935, "learning_rate": 1.1128149715483547e-06, "loss": 0.3603, "step": 1493 }, { "ETA": 3.07, "epoch": 0.4804630969609262, "fp16_scale": 1.0, "global_step": 1494, "grad_norm": 1.9074915275539868, "learning_rate": 1.1117795749137206e-06, "loss": 0.4314, "step": 1494 }, { "ETA": 3.07, "epoch": 0.4807846920726805, "fp16_scale": 1.0, "global_step": 1495, "grad_norm": 2.0833759204535114, "learning_rate": 1.1107440569157444e-06, "loss": 0.3921, "step": 1495 }, { "ETA": 3.07, "epoch": 0.4811062871844348, "fp16_scale": 1.0, "global_step": 1496, "grad_norm": 2.158830571179874, "learning_rate": 1.109708418678728e-06, "loss": 0.3662, "step": 1496 }, { "ETA": 3.07, "epoch": 0.4814278822961891, "fp16_scale": 1.0, "global_step": 1497, "grad_norm": 1.9519051946568877, "learning_rate": 1.1086726613271028e-06, "loss": 0.4709, "step": 1497 }, { "ETA": 3.07, "epoch": 0.4817494774079434, "fp16_scale": 1.0, "global_step": 1498, "grad_norm": 1.8926894207331328, "learning_rate": 1.1076367859854302e-06, "loss": 0.4197, "step": 1498 }, { "ETA": 3.06, "epoch": 0.4820710725196977, "fp16_scale": 1.0, "global_step": 1499, "grad_norm": 1.817159057438267, "learning_rate": 1.1066007937783995e-06, "loss": 0.397, "step": 1499 }, { "ETA": 3.06, "epoch": 0.482392667631452, "fp16_scale": 1.0, "global_step": 1500, "grad_norm": 1.8333475714240695, "learning_rate": 1.1055646858308264e-06, "loss": 0.3492, "step": 1500 }, { "ETA": 3.06, "epoch": 0.4827142627432063, "fp16_scale": 1.0, "global_step": 1501, "grad_norm": 1.9852329828822428, "learning_rate": 1.1045284632676535e-06, "loss": 0.4314, "step": 1501 }, { "ETA": 3.06, "epoch": 0.4830358578549606, "fp16_scale": 1.0, "global_step": 1502, "grad_norm": 2.0970325834651855, "learning_rate": 1.1034921272139466e-06, "loss": 0.4529, "step": 1502 }, { "ETA": 3.06, "epoch": 0.4833574529667149, "fp16_scale": 1.0, "global_step": 1503, "grad_norm": 1.6668443903297765, "learning_rate": 1.1024556787948955e-06, "loss": 0.3715, "step": 1503 }, { "ETA": 3.05, "epoch": 0.48367904807846923, "fp16_scale": 1.0, "global_step": 1504, "grad_norm": 1.7112833732495047, "learning_rate": 1.1014191191358117e-06, "loss": 0.438, "step": 1504 }, { "ETA": 3.05, "epoch": 0.4840006431902235, "fp16_scale": 1.0, "global_step": 1505, "grad_norm": 2.1462971318756345, "learning_rate": 1.1003824493621274e-06, "loss": 0.4878, "step": 1505 }, { "ETA": 3.05, "epoch": 0.4843222383019778, "fp16_scale": 1.0, "global_step": 1506, "grad_norm": 2.0376448441760284, "learning_rate": 1.0993456705993945e-06, "loss": 0.4611, "step": 1506 }, { "ETA": 3.05, "epoch": 0.48464383341373213, "fp16_scale": 1.0, "global_step": 1507, "grad_norm": 2.0584199331524005, "learning_rate": 1.0983087839732831e-06, "loss": 0.4487, "step": 1507 }, { "ETA": 3.05, "epoch": 0.4849654285254864, "fp16_scale": 1.0, "global_step": 1508, "grad_norm": 1.9937428681748068, "learning_rate": 1.0972717906095808e-06, "loss": 0.412, "step": 1508 }, { "ETA": 3.05, "epoch": 0.4852870236372407, "fp16_scale": 1.0, "global_step": 1509, "grad_norm": 1.9568850624984397, "learning_rate": 1.0962346916341902e-06, "loss": 0.4019, "step": 1509 }, { "ETA": 3.04, "epoch": 0.485608618748995, "fp16_scale": 1.0, "global_step": 1510, "grad_norm": 1.86885155304055, "learning_rate": 1.0951974881731298e-06, "loss": 0.367, "step": 1510 }, { "ETA": 3.04, "epoch": 0.4859302138607493, "fp16_scale": 1.0, "global_step": 1511, "grad_norm": 1.995223230031954, "learning_rate": 1.0941601813525308e-06, "loss": 0.3998, "step": 1511 }, { "ETA": 3.04, "epoch": 0.48625180897250364, "fp16_scale": 1.0, "global_step": 1512, "grad_norm": 1.9989087916661044, "learning_rate": 1.0931227722986363e-06, "loss": 0.3847, "step": 1512 }, { "ETA": 3.04, "epoch": 0.4865734040842579, "fp16_scale": 1.0, "global_step": 1513, "grad_norm": 2.6041634816230093, "learning_rate": 1.092085262137801e-06, "loss": 0.4038, "step": 1513 }, { "ETA": 3.04, "epoch": 0.4868949991960122, "fp16_scale": 1.0, "global_step": 1514, "grad_norm": 2.0229525051667463, "learning_rate": 1.0910476519964895e-06, "loss": 0.4329, "step": 1514 }, { "ETA": 3.03, "epoch": 0.48721659430776654, "fp16_scale": 1.0, "global_step": 1515, "grad_norm": 1.7548148877659235, "learning_rate": 1.0900099430012743e-06, "loss": 0.3799, "step": 1515 }, { "ETA": 3.03, "epoch": 0.4875381894195208, "fp16_scale": 1.0, "global_step": 1516, "grad_norm": 1.9718031174852972, "learning_rate": 1.088972136278836e-06, "loss": 0.4295, "step": 1516 }, { "ETA": 3.03, "epoch": 0.4878597845312751, "fp16_scale": 1.0, "global_step": 1517, "grad_norm": 1.8079149325138577, "learning_rate": 1.0879342329559602e-06, "loss": 0.3546, "step": 1517 }, { "ETA": 3.03, "epoch": 0.48818137964302943, "fp16_scale": 1.0, "global_step": 1518, "grad_norm": 1.827517518365253, "learning_rate": 1.0868962341595387e-06, "loss": 0.3784, "step": 1518 }, { "ETA": 3.03, "epoch": 0.4885029747547837, "fp16_scale": 1.0, "global_step": 1519, "grad_norm": 1.9489494910458018, "learning_rate": 1.085858141016566e-06, "loss": 0.4402, "step": 1519 }, { "ETA": 3.03, "epoch": 0.48882456986653805, "fp16_scale": 1.0, "global_step": 1520, "grad_norm": 1.9675619743928485, "learning_rate": 1.0848199546541391e-06, "loss": 0.4943, "step": 1520 }, { "ETA": 3.02, "epoch": 0.48914616497829233, "fp16_scale": 1.0, "global_step": 1521, "grad_norm": 1.803594362549338, "learning_rate": 1.0837816761994575e-06, "loss": 0.4484, "step": 1521 }, { "ETA": 3.02, "epoch": 0.4894677600900466, "fp16_scale": 1.0, "global_step": 1522, "grad_norm": 1.9153258150997852, "learning_rate": 1.082743306779819e-06, "loss": 0.4015, "step": 1522 }, { "ETA": 3.02, "epoch": 0.48978935520180095, "fp16_scale": 1.0, "global_step": 1523, "grad_norm": 1.920659753656018, "learning_rate": 1.0817048475226202e-06, "loss": 0.4181, "step": 1523 }, { "ETA": 3.02, "epoch": 0.4901109503135552, "fp16_scale": 1.0, "global_step": 1524, "grad_norm": 1.8967381684793498, "learning_rate": 1.080666299555357e-06, "loss": 0.3888, "step": 1524 }, { "ETA": 3.02, "epoch": 0.49043254542530956, "fp16_scale": 1.0, "global_step": 1525, "grad_norm": 2.0195313420369074, "learning_rate": 1.0796276640056198e-06, "loss": 0.4935, "step": 1525 }, { "ETA": 3.02, "epoch": 0.49075414053706384, "fp16_scale": 1.0, "global_step": 1526, "grad_norm": 1.9326539942795364, "learning_rate": 1.078588942001095e-06, "loss": 0.4029, "step": 1526 }, { "ETA": 3.01, "epoch": 0.4910757356488181, "fp16_scale": 1.0, "global_step": 1527, "grad_norm": 1.7900283969233834, "learning_rate": 1.0775501346695628e-06, "loss": 0.4482, "step": 1527 }, { "ETA": 3.01, "epoch": 0.49139733076057246, "fp16_scale": 1.0, "global_step": 1528, "grad_norm": 1.840014114429064, "learning_rate": 1.0765112431388954e-06, "loss": 0.343, "step": 1528 }, { "ETA": 3.01, "epoch": 0.49171892587232674, "fp16_scale": 1.0, "global_step": 1529, "grad_norm": 1.9446197672831362, "learning_rate": 1.0754722685370571e-06, "loss": 0.4071, "step": 1529 }, { "ETA": 3.01, "epoch": 0.492040520984081, "fp16_scale": 1.0, "global_step": 1530, "grad_norm": 2.0102995975591575, "learning_rate": 1.0744332119921027e-06, "loss": 0.4499, "step": 1530 }, { "ETA": 3.01, "epoch": 0.49236211609583536, "fp16_scale": 1.0, "global_step": 1531, "grad_norm": 2.7472059083687204, "learning_rate": 1.0733940746321746e-06, "loss": 0.5255, "step": 1531 }, { "ETA": 3.0, "epoch": 0.49268371120758964, "fp16_scale": 1.0, "global_step": 1532, "grad_norm": 1.8943751253703869, "learning_rate": 1.0723548575855045e-06, "loss": 0.4162, "step": 1532 }, { "ETA": 3.0, "epoch": 0.49300530631934397, "fp16_scale": 1.0, "global_step": 1533, "grad_norm": 1.81240543059852, "learning_rate": 1.07131556198041e-06, "loss": 0.4207, "step": 1533 }, { "ETA": 3.0, "epoch": 0.49332690143109825, "fp16_scale": 1.0, "global_step": 1534, "grad_norm": 2.2554017559698076, "learning_rate": 1.070276188945293e-06, "loss": 0.3687, "step": 1534 }, { "ETA": 3.0, "epoch": 0.49364849654285253, "fp16_scale": 1.0, "global_step": 1535, "grad_norm": 1.8460626884530826, "learning_rate": 1.0692367396086413e-06, "loss": 0.432, "step": 1535 }, { "ETA": 3.0, "epoch": 0.49397009165460687, "fp16_scale": 1.0, "global_step": 1536, "grad_norm": 2.018607800130137, "learning_rate": 1.0681972150990245e-06, "loss": 0.4588, "step": 1536 }, { "ETA": 2.99, "epoch": 0.49429168676636115, "fp16_scale": 1.0, "global_step": 1537, "grad_norm": 2.003308828023504, "learning_rate": 1.0671576165450934e-06, "loss": 0.3649, "step": 1537 }, { "ETA": 2.99, "epoch": 0.49461328187811543, "fp16_scale": 1.0, "global_step": 1538, "grad_norm": 2.138620941424885, "learning_rate": 1.0661179450755804e-06, "loss": 0.4455, "step": 1538 }, { "ETA": 2.99, "epoch": 0.49493487698986977, "fp16_scale": 1.0, "global_step": 1539, "grad_norm": 2.008499212054112, "learning_rate": 1.065078201819296e-06, "loss": 0.4251, "step": 1539 }, { "ETA": 2.99, "epoch": 0.49525647210162405, "fp16_scale": 1.0, "global_step": 1540, "grad_norm": 2.082974182783928, "learning_rate": 1.0640383879051294e-06, "loss": 0.4122, "step": 1540 }, { "ETA": 2.99, "epoch": 0.4955780672133784, "fp16_scale": 1.0, "global_step": 1541, "grad_norm": 2.0246456266267274, "learning_rate": 1.0629985044620458e-06, "loss": 0.4179, "step": 1541 }, { "ETA": 2.98, "epoch": 0.49589966232513266, "fp16_scale": 1.0, "global_step": 1542, "grad_norm": 1.7869547994218695, "learning_rate": 1.0619585526190864e-06, "loss": 0.4072, "step": 1542 }, { "ETA": 2.98, "epoch": 0.49622125743688694, "fp16_scale": 1.0, "global_step": 1543, "grad_norm": 1.9121098432741022, "learning_rate": 1.0609185335053668e-06, "loss": 0.3966, "step": 1543 }, { "ETA": 2.98, "epoch": 0.4965428525486413, "fp16_scale": 1.0, "global_step": 1544, "grad_norm": 1.9761616742372048, "learning_rate": 1.059878448250075e-06, "loss": 0.4623, "step": 1544 }, { "ETA": 2.98, "epoch": 0.49686444766039556, "fp16_scale": 1.0, "global_step": 1545, "grad_norm": 1.9375219244560744, "learning_rate": 1.0588382979824712e-06, "loss": 0.4625, "step": 1545 }, { "ETA": 2.98, "epoch": 0.49718604277214984, "fp16_scale": 1.0, "global_step": 1546, "grad_norm": 1.8897575358362109, "learning_rate": 1.0577980838318865e-06, "loss": 0.4216, "step": 1546 }, { "ETA": 2.97, "epoch": 0.4975076378839042, "fp16_scale": 1.0, "global_step": 1547, "grad_norm": 1.9459334857630748, "learning_rate": 1.0567578069277207e-06, "loss": 0.4695, "step": 1547 }, { "ETA": 2.97, "epoch": 0.49782923299565845, "fp16_scale": 1.0, "global_step": 1548, "grad_norm": 2.1765654533864325, "learning_rate": 1.055717468399442e-06, "loss": 0.4074, "step": 1548 }, { "ETA": 2.97, "epoch": 0.4981508281074128, "fp16_scale": 1.0, "global_step": 1549, "grad_norm": 1.9986088515556024, "learning_rate": 1.054677069376586e-06, "loss": 0.4243, "step": 1549 }, { "ETA": 2.97, "epoch": 0.49847242321916707, "fp16_scale": 1.0, "global_step": 1550, "grad_norm": 2.0001655689294786, "learning_rate": 1.053636610988753e-06, "loss": 0.4046, "step": 1550 }, { "ETA": 2.97, "epoch": 0.49879401833092135, "fp16_scale": 1.0, "global_step": 1551, "grad_norm": 2.030441048637085, "learning_rate": 1.0525960943656088e-06, "loss": 0.3804, "step": 1551 }, { "ETA": 2.97, "epoch": 0.4991156134426757, "fp16_scale": 1.0, "global_step": 1552, "grad_norm": 1.9160403972161872, "learning_rate": 1.0515555206368813e-06, "loss": 0.4582, "step": 1552 }, { "ETA": 2.96, "epoch": 0.49943720855442997, "fp16_scale": 1.0, "global_step": 1553, "grad_norm": 1.9847974263885124, "learning_rate": 1.0505148909323615e-06, "loss": 0.4201, "step": 1553 }, { "ETA": 2.96, "epoch": 0.49975880366618425, "fp16_scale": 1.0, "global_step": 1554, "grad_norm": 1.8887130403081902, "learning_rate": 1.0494742063819008e-06, "loss": 0.3822, "step": 1554 }, { "ETA": 2.96, "epoch": 0.5000803987779385, "fp16_scale": 1.0, "global_step": 1555, "grad_norm": 1.8142692983493474, "learning_rate": 1.04843346811541e-06, "loss": 0.3874, "step": 1555 }, { "ETA": 2.96, "epoch": 0.5004019938896929, "fp16_scale": 1.0, "global_step": 1556, "grad_norm": 2.199494217948949, "learning_rate": 1.047392677262858e-06, "loss": 0.4794, "step": 1556 }, { "ETA": 2.96, "epoch": 0.5007235890014472, "fp16_scale": 1.0, "global_step": 1557, "grad_norm": 1.991244890119865, "learning_rate": 1.0463518349542712e-06, "loss": 0.4124, "step": 1557 }, { "ETA": 2.95, "epoch": 0.5010451841132014, "fp16_scale": 1.0, "global_step": 1558, "grad_norm": 1.9890509024092522, "learning_rate": 1.0453109423197317e-06, "loss": 0.3856, "step": 1558 }, { "ETA": 2.95, "epoch": 0.5013667792249558, "fp16_scale": 1.0, "global_step": 1559, "grad_norm": 2.1346695450030393, "learning_rate": 1.0442700004893764e-06, "loss": 0.4418, "step": 1559 }, { "ETA": 2.95, "epoch": 0.5016883743367101, "fp16_scale": 1.0, "global_step": 1560, "grad_norm": 1.8954676936171913, "learning_rate": 1.0432290105933955e-06, "loss": 0.4071, "step": 1560 }, { "ETA": 2.95, "epoch": 0.5020099694484644, "fp16_scale": 1.0, "global_step": 1561, "grad_norm": 1.8429515163232004, "learning_rate": 1.0421879737620311e-06, "loss": 0.3978, "step": 1561 }, { "ETA": 2.95, "epoch": 0.5023315645602187, "fp16_scale": 1.0, "global_step": 1562, "grad_norm": 1.8811312041106818, "learning_rate": 1.041146891125577e-06, "loss": 0.4292, "step": 1562 }, { "ETA": 2.95, "epoch": 0.502653159671973, "fp16_scale": 1.0, "global_step": 1563, "grad_norm": 2.149633377125283, "learning_rate": 1.0401057638143759e-06, "loss": 0.4273, "step": 1563 }, { "ETA": 2.94, "epoch": 0.5029747547837273, "fp16_scale": 1.0, "global_step": 1564, "grad_norm": 2.118221507675948, "learning_rate": 1.0390645929588195e-06, "loss": 0.4255, "step": 1564 }, { "ETA": 2.94, "epoch": 0.5032963498954816, "fp16_scale": 1.0, "global_step": 1565, "grad_norm": 2.108264126810423, "learning_rate": 1.0380233796893464e-06, "loss": 0.4181, "step": 1565 }, { "ETA": 2.94, "epoch": 0.5036179450072359, "fp16_scale": 1.0, "global_step": 1566, "grad_norm": 2.0316887594871957, "learning_rate": 1.0369821251364418e-06, "loss": 0.453, "step": 1566 }, { "ETA": 2.94, "epoch": 0.5039395401189902, "fp16_scale": 1.0, "global_step": 1567, "grad_norm": 2.3010183383648437, "learning_rate": 1.0359408304306358e-06, "loss": 0.445, "step": 1567 }, { "ETA": 2.94, "epoch": 0.5042611352307445, "fp16_scale": 1.0, "global_step": 1568, "grad_norm": 1.99798364861865, "learning_rate": 1.034899496702501e-06, "loss": 0.363, "step": 1568 }, { "ETA": 2.93, "epoch": 0.5045827303424988, "fp16_scale": 1.0, "global_step": 1569, "grad_norm": 1.93397754256817, "learning_rate": 1.0338581250826535e-06, "loss": 0.3803, "step": 1569 }, { "ETA": 2.93, "epoch": 0.5049043254542531, "fp16_scale": 1.0, "global_step": 1570, "grad_norm": 2.0151514240646855, "learning_rate": 1.0328167167017498e-06, "loss": 0.3669, "step": 1570 }, { "ETA": 2.93, "epoch": 0.5052259205660073, "fp16_scale": 1.0, "global_step": 1571, "grad_norm": 1.9817707611036037, "learning_rate": 1.031775272690487e-06, "loss": 0.5408, "step": 1571 }, { "ETA": 2.93, "epoch": 0.5055475156777617, "fp16_scale": 1.0, "global_step": 1572, "grad_norm": 1.8303409444087517, "learning_rate": 1.0307337941796003e-06, "loss": 0.3755, "step": 1572 }, { "ETA": 2.92, "epoch": 0.505869110789516, "fp16_scale": 1.0, "global_step": 1573, "grad_norm": 2.2494160842599698, "learning_rate": 1.029692282299863e-06, "loss": 0.3777, "step": 1573 }, { "ETA": 2.92, "epoch": 0.5061907059012704, "fp16_scale": 1.0, "global_step": 1574, "grad_norm": 1.988669734406121, "learning_rate": 1.0286507381820837e-06, "loss": 0.3368, "step": 1574 }, { "ETA": 2.92, "epoch": 0.5065123010130246, "fp16_scale": 1.0, "global_step": 1575, "grad_norm": 1.9620238986626437, "learning_rate": 1.0276091629571067e-06, "loss": 0.4226, "step": 1575 }, { "ETA": 2.92, "epoch": 0.5068338961247789, "fp16_scale": 1.0, "global_step": 1576, "grad_norm": 1.9486692517008308, "learning_rate": 1.0265675577558098e-06, "loss": 0.4111, "step": 1576 }, { "ETA": 2.92, "epoch": 0.5071554912365333, "fp16_scale": 1.0, "global_step": 1577, "grad_norm": 1.8619620960497394, "learning_rate": 1.0255259237091037e-06, "loss": 0.4338, "step": 1577 }, { "ETA": 2.92, "epoch": 0.5074770863482875, "fp16_scale": 1.0, "global_step": 1578, "grad_norm": 2.049798226514897, "learning_rate": 1.02448426194793e-06, "loss": 0.4825, "step": 1578 }, { "ETA": 2.91, "epoch": 0.5077986814600418, "fp16_scale": 1.0, "global_step": 1579, "grad_norm": 1.847056120246811, "learning_rate": 1.0234425736032607e-06, "loss": 0.4025, "step": 1579 }, { "ETA": 2.91, "epoch": 0.5081202765717961, "fp16_scale": 1.0, "global_step": 1580, "grad_norm": 1.8976665048301893, "learning_rate": 1.022400859806096e-06, "loss": 0.4233, "step": 1580 }, { "ETA": 2.91, "epoch": 0.5084418716835504, "fp16_scale": 1.0, "global_step": 1581, "grad_norm": 2.0290085809447476, "learning_rate": 1.0213591216874646e-06, "loss": 0.4502, "step": 1581 }, { "ETA": 2.91, "epoch": 0.5087634667953047, "fp16_scale": 1.0, "global_step": 1582, "grad_norm": 1.9375355634273952, "learning_rate": 1.0203173603784216e-06, "loss": 0.4502, "step": 1582 }, { "ETA": 2.91, "epoch": 0.509085061907059, "fp16_scale": 1.0, "global_step": 1583, "grad_norm": 2.14685888000696, "learning_rate": 1.0192755770100466e-06, "loss": 0.4561, "step": 1583 }, { "ETA": 2.9, "epoch": 0.5094066570188133, "fp16_scale": 1.0, "global_step": 1584, "grad_norm": 2.0901680388439803, "learning_rate": 1.0182337727134429e-06, "loss": 0.4819, "step": 1584 }, { "ETA": 2.9, "epoch": 0.5097282521305676, "fp16_scale": 1.0, "global_step": 1585, "grad_norm": 2.079279503569149, "learning_rate": 1.0171919486197384e-06, "loss": 0.4494, "step": 1585 }, { "ETA": 2.9, "epoch": 0.5100498472423219, "fp16_scale": 1.0, "global_step": 1586, "grad_norm": 1.9829272829978706, "learning_rate": 1.0161501058600803e-06, "loss": 0.5084, "step": 1586 }, { "ETA": 2.9, "epoch": 0.5103714423540762, "fp16_scale": 1.0, "global_step": 1587, "grad_norm": 2.262496505148507, "learning_rate": 1.0151082455656367e-06, "loss": 0.3741, "step": 1587 }, { "ETA": 2.9, "epoch": 0.5106930374658305, "fp16_scale": 1.0, "global_step": 1588, "grad_norm": 1.9231255778101124, "learning_rate": 1.0140663688675959e-06, "loss": 0.4202, "step": 1588 }, { "ETA": 2.89, "epoch": 0.5110146325775848, "fp16_scale": 1.0, "global_step": 1589, "grad_norm": 2.3576653684054167, "learning_rate": 1.0130244768971628e-06, "loss": 0.3904, "step": 1589 }, { "ETA": 2.89, "epoch": 0.5113362276893392, "fp16_scale": 1.0, "global_step": 1590, "grad_norm": 1.9822391498742844, "learning_rate": 1.0119825707855588e-06, "loss": 0.4375, "step": 1590 }, { "ETA": 2.89, "epoch": 0.5116578228010934, "fp16_scale": 1.0, "global_step": 1591, "grad_norm": 2.170209467428759, "learning_rate": 1.0109406516640212e-06, "loss": 0.4552, "step": 1591 }, { "ETA": 2.89, "epoch": 0.5119794179128477, "fp16_scale": 1.0, "global_step": 1592, "grad_norm": 1.8182302454950556, "learning_rate": 1.0098987206638014e-06, "loss": 0.4004, "step": 1592 }, { "ETA": 2.89, "epoch": 0.5123010130246021, "fp16_scale": 1.0, "global_step": 1593, "grad_norm": 2.2742069246283583, "learning_rate": 1.0088567789161637e-06, "loss": 0.4123, "step": 1593 }, { "ETA": 2.88, "epoch": 0.5126226081363563, "fp16_scale": 1.0, "global_step": 1594, "grad_norm": 1.8074854603327288, "learning_rate": 1.0078148275523839e-06, "loss": 0.4022, "step": 1594 }, { "ETA": 2.88, "epoch": 0.5129442032481106, "fp16_scale": 1.0, "global_step": 1595, "grad_norm": 1.8919083330036863, "learning_rate": 1.006772867703748e-06, "loss": 0.4717, "step": 1595 }, { "ETA": 2.88, "epoch": 0.513265798359865, "fp16_scale": 1.0, "global_step": 1596, "grad_norm": 1.930780179649879, "learning_rate": 1.0057309005015517e-06, "loss": 0.4667, "step": 1596 }, { "ETA": 2.88, "epoch": 0.5135873934716192, "fp16_scale": 1.0, "global_step": 1597, "grad_norm": 1.8823481611376727, "learning_rate": 1.0046889270770987e-06, "loss": 0.4272, "step": 1597 }, { "ETA": 2.88, "epoch": 0.5139089885833735, "fp16_scale": 1.0, "global_step": 1598, "grad_norm": 2.085460333805262, "learning_rate": 1.0036469485616985e-06, "loss": 0.4407, "step": 1598 }, { "ETA": 2.87, "epoch": 0.5142305836951279, "fp16_scale": 1.0, "global_step": 1599, "grad_norm": 2.050585262690548, "learning_rate": 1.0026049660866675e-06, "loss": 0.4578, "step": 1599 }, { "ETA": 2.87, "epoch": 0.5145521788068821, "fp16_scale": 1.0, "global_step": 1600, "grad_norm": 2.02747949847014, "learning_rate": 1.001562980783326e-06, "loss": 0.481, "step": 1600 }, { "ETA": 2.88, "epoch": 0.5148737739186364, "fp16_scale": 1.0, "global_step": 1601, "grad_norm": 2.1102242298586003, "learning_rate": 1.0005209937829962e-06, "loss": 0.4059, "step": 1601 }, { "ETA": 2.88, "epoch": 0.5151953690303908, "fp16_scale": 1.0, "global_step": 1602, "grad_norm": 1.9033299830802899, "learning_rate": 9.994790062170037e-07, "loss": 0.3734, "step": 1602 }, { "ETA": 2.88, "epoch": 0.515516964142145, "fp16_scale": 1.0, "global_step": 1603, "grad_norm": 2.223860335625996, "learning_rate": 9.984370192166742e-07, "loss": 0.4589, "step": 1603 }, { "ETA": 2.87, "epoch": 0.5158385592538993, "fp16_scale": 1.0, "global_step": 1604, "grad_norm": 1.91984635987963, "learning_rate": 9.973950339133322e-07, "loss": 0.3472, "step": 1604 }, { "ETA": 2.87, "epoch": 0.5161601543656537, "fp16_scale": 1.0, "global_step": 1605, "grad_norm": 2.075549344169445, "learning_rate": 9.963530514383016e-07, "loss": 0.4624, "step": 1605 }, { "ETA": 2.87, "epoch": 0.516481749477408, "fp16_scale": 1.0, "global_step": 1606, "grad_norm": 1.859380458646878, "learning_rate": 9.953110729229016e-07, "loss": 0.4281, "step": 1606 }, { "ETA": 2.87, "epoch": 0.5168033445891622, "fp16_scale": 1.0, "global_step": 1607, "grad_norm": 2.060696769744449, "learning_rate": 9.942690994984484e-07, "loss": 0.4407, "step": 1607 }, { "ETA": 2.87, "epoch": 0.5171249397009166, "fp16_scale": 1.0, "global_step": 1608, "grad_norm": 1.7215868508271845, "learning_rate": 9.932271322962521e-07, "loss": 0.4237, "step": 1608 }, { "ETA": 2.86, "epoch": 0.5174465348126709, "fp16_scale": 1.0, "global_step": 1609, "grad_norm": 1.8170851637621381, "learning_rate": 9.921851724476158e-07, "loss": 0.3627, "step": 1609 }, { "ETA": 2.86, "epoch": 0.5177681299244251, "fp16_scale": 1.0, "global_step": 1610, "grad_norm": 2.080001494718728, "learning_rate": 9.911432210838363e-07, "loss": 0.3805, "step": 1610 }, { "ETA": 2.86, "epoch": 0.5180897250361794, "fp16_scale": 1.0, "global_step": 1611, "grad_norm": 2.2985486698660007, "learning_rate": 9.901012793361985e-07, "loss": 0.3878, "step": 1611 }, { "ETA": 2.86, "epoch": 0.5184113201479338, "fp16_scale": 1.0, "global_step": 1612, "grad_norm": 2.5086366966372684, "learning_rate": 9.890593483359787e-07, "loss": 0.488, "step": 1612 }, { "ETA": 2.85, "epoch": 0.518732915259688, "fp16_scale": 1.0, "global_step": 1613, "grad_norm": 2.010293325396669, "learning_rate": 9.880174292144416e-07, "loss": 0.376, "step": 1613 }, { "ETA": 2.85, "epoch": 0.5190545103714423, "fp16_scale": 1.0, "global_step": 1614, "grad_norm": 1.8358532866623107, "learning_rate": 9.869755231028373e-07, "loss": 0.4112, "step": 1614 }, { "ETA": 2.85, "epoch": 0.5193761054831967, "fp16_scale": 1.0, "global_step": 1615, "grad_norm": 2.0705185022399926, "learning_rate": 9.85933631132404e-07, "loss": 0.4916, "step": 1615 }, { "ETA": 2.85, "epoch": 0.5196977005949509, "fp16_scale": 1.0, "global_step": 1616, "grad_norm": 2.023709420900433, "learning_rate": 9.848917544343634e-07, "loss": 0.5352, "step": 1616 }, { "ETA": 2.85, "epoch": 0.5200192957067052, "fp16_scale": 1.0, "global_step": 1617, "grad_norm": 1.92094948739541, "learning_rate": 9.838498941399196e-07, "loss": 0.4612, "step": 1617 }, { "ETA": 2.84, "epoch": 0.5203408908184596, "fp16_scale": 1.0, "global_step": 1618, "grad_norm": 2.1590515829343304, "learning_rate": 9.828080513802617e-07, "loss": 0.3923, "step": 1618 }, { "ETA": 2.84, "epoch": 0.5206624859302139, "fp16_scale": 1.0, "global_step": 1619, "grad_norm": 2.1403293792241818, "learning_rate": 9.817662272865568e-07, "loss": 0.4995, "step": 1619 }, { "ETA": 2.84, "epoch": 0.5209840810419681, "fp16_scale": 1.0, "global_step": 1620, "grad_norm": 1.9800035806978962, "learning_rate": 9.807244229899535e-07, "loss": 0.4261, "step": 1620 }, { "ETA": 2.84, "epoch": 0.5213056761537225, "fp16_scale": 1.0, "global_step": 1621, "grad_norm": 2.3143503505629144, "learning_rate": 9.796826396215783e-07, "loss": 0.4136, "step": 1621 }, { "ETA": 2.84, "epoch": 0.5216272712654768, "fp16_scale": 1.0, "global_step": 1622, "grad_norm": 2.0303042528874347, "learning_rate": 9.786408783125353e-07, "loss": 0.5375, "step": 1622 }, { "ETA": 2.83, "epoch": 0.521948866377231, "fp16_scale": 1.0, "global_step": 1623, "grad_norm": 1.7672886436411808, "learning_rate": 9.775991401939043e-07, "loss": 0.463, "step": 1623 }, { "ETA": 2.83, "epoch": 0.5222704614889854, "fp16_scale": 1.0, "global_step": 1624, "grad_norm": 1.9955680595303593, "learning_rate": 9.765574263967395e-07, "loss": 0.3811, "step": 1624 }, { "ETA": 2.83, "epoch": 0.5225920566007397, "fp16_scale": 1.0, "global_step": 1625, "grad_norm": 1.84911013814408, "learning_rate": 9.7551573805207e-07, "loss": 0.4093, "step": 1625 }, { "ETA": 2.83, "epoch": 0.5229136517124939, "fp16_scale": 1.0, "global_step": 1626, "grad_norm": 1.9727405644351568, "learning_rate": 9.744740762908962e-07, "loss": 0.4486, "step": 1626 }, { "ETA": 2.83, "epoch": 0.5232352468242483, "fp16_scale": 1.0, "global_step": 1627, "grad_norm": 1.8987616481187537, "learning_rate": 9.734324422441903e-07, "loss": 0.372, "step": 1627 }, { "ETA": 2.82, "epoch": 0.5235568419360026, "fp16_scale": 1.0, "global_step": 1628, "grad_norm": 2.104874268766279, "learning_rate": 9.723908370428934e-07, "loss": 0.4792, "step": 1628 }, { "ETA": 2.82, "epoch": 0.5238784370477568, "fp16_scale": 1.0, "global_step": 1629, "grad_norm": 2.037931033277432, "learning_rate": 9.713492618179164e-07, "loss": 0.4548, "step": 1629 }, { "ETA": 2.82, "epoch": 0.5242000321595112, "fp16_scale": 1.0, "global_step": 1630, "grad_norm": 1.9870517845156732, "learning_rate": 9.703077177001373e-07, "loss": 0.4648, "step": 1630 }, { "ETA": 2.82, "epoch": 0.5245216272712655, "fp16_scale": 1.0, "global_step": 1631, "grad_norm": 1.9429733091548653, "learning_rate": 9.692662058203996e-07, "loss": 0.4408, "step": 1631 }, { "ETA": 2.82, "epoch": 0.5248432223830197, "fp16_scale": 1.0, "global_step": 1632, "grad_norm": 1.9168587365422025, "learning_rate": 9.68224727309513e-07, "loss": 0.5181, "step": 1632 }, { "ETA": 2.81, "epoch": 0.5251648174947741, "fp16_scale": 1.0, "global_step": 1633, "grad_norm": 1.824832307942761, "learning_rate": 9.6718328329825e-07, "loss": 0.3516, "step": 1633 }, { "ETA": 2.81, "epoch": 0.5254864126065284, "fp16_scale": 1.0, "global_step": 1634, "grad_norm": 2.072616623060313, "learning_rate": 9.661418749173467e-07, "loss": 0.4084, "step": 1634 }, { "ETA": 2.81, "epoch": 0.5258080077182827, "fp16_scale": 1.0, "global_step": 1635, "grad_norm": 1.8799884615276101, "learning_rate": 9.651005032974993e-07, "loss": 0.4411, "step": 1635 }, { "ETA": 2.81, "epoch": 0.526129602830037, "fp16_scale": 1.0, "global_step": 1636, "grad_norm": 1.8586291089594524, "learning_rate": 9.640591695693643e-07, "loss": 0.4822, "step": 1636 }, { "ETA": 2.81, "epoch": 0.5264511979417913, "fp16_scale": 1.0, "global_step": 1637, "grad_norm": 1.9918129012674595, "learning_rate": 9.63017874863558e-07, "loss": 0.4686, "step": 1637 }, { "ETA": 2.8, "epoch": 0.5267727930535456, "fp16_scale": 1.0, "global_step": 1638, "grad_norm": 2.137331264992571, "learning_rate": 9.619766203106533e-07, "loss": 0.4136, "step": 1638 }, { "ETA": 2.8, "epoch": 0.5270943881652999, "fp16_scale": 1.0, "global_step": 1639, "grad_norm": 2.0635890205604763, "learning_rate": 9.609354070411806e-07, "loss": 0.3638, "step": 1639 }, { "ETA": 2.8, "epoch": 0.5274159832770542, "fp16_scale": 1.0, "global_step": 1640, "grad_norm": 1.88341098376735, "learning_rate": 9.598942361856243e-07, "loss": 0.4678, "step": 1640 }, { "ETA": 2.8, "epoch": 0.5277375783888085, "fp16_scale": 1.0, "global_step": 1641, "grad_norm": 2.123834976935097, "learning_rate": 9.588531088744232e-07, "loss": 0.3747, "step": 1641 }, { "ETA": 2.8, "epoch": 0.5280591735005628, "fp16_scale": 1.0, "global_step": 1642, "grad_norm": 2.039091565213063, "learning_rate": 9.57812026237969e-07, "loss": 0.4189, "step": 1642 }, { "ETA": 2.79, "epoch": 0.5283807686123171, "fp16_scale": 1.0, "global_step": 1643, "grad_norm": 1.9671824757357255, "learning_rate": 9.567709894066044e-07, "loss": 0.3653, "step": 1643 }, { "ETA": 2.79, "epoch": 0.5287023637240714, "fp16_scale": 1.0, "global_step": 1644, "grad_norm": 2.3620093742815667, "learning_rate": 9.557299995106237e-07, "loss": 0.3726, "step": 1644 }, { "ETA": 2.79, "epoch": 0.5290239588358256, "fp16_scale": 1.0, "global_step": 1645, "grad_norm": 1.9395855929320645, "learning_rate": 9.546890576802684e-07, "loss": 0.4576, "step": 1645 }, { "ETA": 2.79, "epoch": 0.52934555394758, "fp16_scale": 1.0, "global_step": 1646, "grad_norm": 1.8088910978805381, "learning_rate": 9.536481650457289e-07, "loss": 0.4414, "step": 1646 }, { "ETA": 2.79, "epoch": 0.5296671490593343, "fp16_scale": 1.0, "global_step": 1647, "grad_norm": 2.111357027253241, "learning_rate": 9.52607322737142e-07, "loss": 0.4036, "step": 1647 }, { "ETA": 2.78, "epoch": 0.5299887441710887, "fp16_scale": 1.0, "global_step": 1648, "grad_norm": 1.8565900369107433, "learning_rate": 9.515665318845899e-07, "loss": 0.4302, "step": 1648 }, { "ETA": 2.78, "epoch": 0.5303103392828429, "fp16_scale": 1.0, "global_step": 1649, "grad_norm": 2.1938904807691437, "learning_rate": 9.505257936180991e-07, "loss": 0.3928, "step": 1649 }, { "ETA": 2.78, "epoch": 0.5306319343945972, "fp16_scale": 1.0, "global_step": 1650, "grad_norm": 1.9079779935172827, "learning_rate": 9.494851090676383e-07, "loss": 0.3247, "step": 1650 }, { "ETA": 2.78, "epoch": 0.5309535295063516, "fp16_scale": 1.0, "global_step": 1651, "grad_norm": 1.890955381906359, "learning_rate": 9.484444793631186e-07, "loss": 0.4327, "step": 1651 }, { "ETA": 2.78, "epoch": 0.5312751246181058, "fp16_scale": 1.0, "global_step": 1652, "grad_norm": 2.1857793201911364, "learning_rate": 9.474039056343916e-07, "loss": 0.405, "step": 1652 }, { "ETA": 2.77, "epoch": 0.5315967197298601, "fp16_scale": 1.0, "global_step": 1653, "grad_norm": 2.1612198664743443, "learning_rate": 9.46363389011247e-07, "loss": 0.4921, "step": 1653 }, { "ETA": 2.77, "epoch": 0.5319183148416144, "fp16_scale": 1.0, "global_step": 1654, "grad_norm": 1.6790166735223522, "learning_rate": 9.453229306234142e-07, "loss": 0.4057, "step": 1654 }, { "ETA": 2.77, "epoch": 0.5322399099533687, "fp16_scale": 1.0, "global_step": 1655, "grad_norm": 1.81010700995378, "learning_rate": 9.442825316005579e-07, "loss": 0.3761, "step": 1655 }, { "ETA": 2.77, "epoch": 0.532561505065123, "fp16_scale": 1.0, "global_step": 1656, "grad_norm": 2.0145802715577465, "learning_rate": 9.432421930722792e-07, "loss": 0.4328, "step": 1656 }, { "ETA": 2.77, "epoch": 0.5328831001768773, "fp16_scale": 1.0, "global_step": 1657, "grad_norm": 1.931093282712056, "learning_rate": 9.422019161681137e-07, "loss": 0.5056, "step": 1657 }, { "ETA": 2.76, "epoch": 0.5332046952886316, "fp16_scale": 1.0, "global_step": 1658, "grad_norm": 1.9353050895637487, "learning_rate": 9.411617020175287e-07, "loss": 0.4093, "step": 1658 }, { "ETA": 2.76, "epoch": 0.5335262904003859, "fp16_scale": 1.0, "global_step": 1659, "grad_norm": 2.1583820544387824, "learning_rate": 9.40121551749925e-07, "loss": 0.4928, "step": 1659 }, { "ETA": 2.76, "epoch": 0.5338478855121402, "fp16_scale": 1.0, "global_step": 1660, "grad_norm": 1.792074199018544, "learning_rate": 9.390814664946331e-07, "loss": 0.4454, "step": 1660 }, { "ETA": 2.76, "epoch": 0.5341694806238945, "fp16_scale": 1.0, "global_step": 1661, "grad_norm": 1.9358430781906717, "learning_rate": 9.380414473809136e-07, "loss": 0.4129, "step": 1661 }, { "ETA": 2.76, "epoch": 0.5344910757356488, "fp16_scale": 1.0, "global_step": 1662, "grad_norm": 1.9529546238115019, "learning_rate": 9.370014955379539e-07, "loss": 0.4762, "step": 1662 }, { "ETA": 2.76, "epoch": 0.5348126708474031, "fp16_scale": 1.0, "global_step": 1663, "grad_norm": 2.2638924144295607, "learning_rate": 9.359616120948707e-07, "loss": 0.4251, "step": 1663 }, { "ETA": 2.75, "epoch": 0.5351342659591575, "fp16_scale": 1.0, "global_step": 1664, "grad_norm": 1.9813429016738413, "learning_rate": 9.34921798180704e-07, "loss": 0.3654, "step": 1664 }, { "ETA": 2.75, "epoch": 0.5354558610709117, "fp16_scale": 1.0, "global_step": 1665, "grad_norm": 1.960187197727769, "learning_rate": 9.338820549244196e-07, "loss": 0.4539, "step": 1665 }, { "ETA": 2.75, "epoch": 0.535777456182666, "fp16_scale": 1.0, "global_step": 1666, "grad_norm": 1.9770406298384058, "learning_rate": 9.328423834549069e-07, "loss": 0.4313, "step": 1666 }, { "ETA": 2.75, "epoch": 0.5360990512944204, "fp16_scale": 1.0, "global_step": 1667, "grad_norm": 1.9335789732605826, "learning_rate": 9.318027849009758e-07, "loss": 0.4187, "step": 1667 }, { "ETA": 2.75, "epoch": 0.5364206464061746, "fp16_scale": 1.0, "global_step": 1668, "grad_norm": 2.118099777727274, "learning_rate": 9.307632603913587e-07, "loss": 0.4702, "step": 1668 }, { "ETA": 2.75, "epoch": 0.5367422415179289, "fp16_scale": 1.0, "global_step": 1669, "grad_norm": 2.038387896608497, "learning_rate": 9.297238110547074e-07, "loss": 0.4608, "step": 1669 }, { "ETA": 2.74, "epoch": 0.5370638366296833, "fp16_scale": 1.0, "global_step": 1670, "grad_norm": 2.0556956671491324, "learning_rate": 9.286844380195902e-07, "loss": 0.4327, "step": 1670 }, { "ETA": 2.74, "epoch": 0.5373854317414375, "fp16_scale": 1.0, "global_step": 1671, "grad_norm": 1.9672969510997889, "learning_rate": 9.276451424144956e-07, "loss": 0.4308, "step": 1671 }, { "ETA": 2.74, "epoch": 0.5377070268531918, "fp16_scale": 1.0, "global_step": 1672, "grad_norm": 1.7491289758749573, "learning_rate": 9.266059253678254e-07, "loss": 0.3831, "step": 1672 }, { "ETA": 2.74, "epoch": 0.5380286219649462, "fp16_scale": 1.0, "global_step": 1673, "grad_norm": 1.9553234843934735, "learning_rate": 9.255667880078974e-07, "loss": 0.386, "step": 1673 }, { "ETA": 2.74, "epoch": 0.5383502170767004, "fp16_scale": 1.0, "global_step": 1674, "grad_norm": 1.952435448768448, "learning_rate": 9.24527731462943e-07, "loss": 0.4162, "step": 1674 }, { "ETA": 2.73, "epoch": 0.5386718121884547, "fp16_scale": 1.0, "global_step": 1675, "grad_norm": 2.2601203770527776, "learning_rate": 9.234887568611047e-07, "loss": 0.4065, "step": 1675 }, { "ETA": 2.73, "epoch": 0.5389934073002091, "fp16_scale": 1.0, "global_step": 1676, "grad_norm": 1.8281147986268291, "learning_rate": 9.224498653304375e-07, "loss": 0.3861, "step": 1676 }, { "ETA": 2.73, "epoch": 0.5393150024119633, "fp16_scale": 1.0, "global_step": 1677, "grad_norm": 2.0668405194874073, "learning_rate": 9.214110579989049e-07, "loss": 0.4329, "step": 1677 }, { "ETA": 2.73, "epoch": 0.5396365975237176, "fp16_scale": 1.0, "global_step": 1678, "grad_norm": 2.0307273211037775, "learning_rate": 9.203723359943802e-07, "loss": 0.5069, "step": 1678 }, { "ETA": 2.73, "epoch": 0.539958192635472, "fp16_scale": 1.0, "global_step": 1679, "grad_norm": 2.060692868732403, "learning_rate": 9.193337004446427e-07, "loss": 0.4973, "step": 1679 }, { "ETA": 2.72, "epoch": 0.5402797877472263, "fp16_scale": 1.0, "global_step": 1680, "grad_norm": 1.8842225015216565, "learning_rate": 9.182951524773797e-07, "loss": 0.4737, "step": 1680 }, { "ETA": 2.72, "epoch": 0.5406013828589805, "fp16_scale": 1.0, "global_step": 1681, "grad_norm": 2.049911978458765, "learning_rate": 9.172566932201813e-07, "loss": 0.3778, "step": 1681 }, { "ETA": 2.72, "epoch": 0.5409229779707349, "fp16_scale": 1.0, "global_step": 1682, "grad_norm": 2.04391244333585, "learning_rate": 9.162183238005424e-07, "loss": 0.4058, "step": 1682 }, { "ETA": 2.72, "epoch": 0.5412445730824892, "fp16_scale": 1.0, "global_step": 1683, "grad_norm": 2.1703937688139727, "learning_rate": 9.151800453458607e-07, "loss": 0.4195, "step": 1683 }, { "ETA": 2.72, "epoch": 0.5415661681942434, "fp16_scale": 1.0, "global_step": 1684, "grad_norm": 2.1288523361524985, "learning_rate": 9.141418589834339e-07, "loss": 0.4428, "step": 1684 }, { "ETA": 2.71, "epoch": 0.5418877633059977, "fp16_scale": 1.0, "global_step": 1685, "grad_norm": 2.1916580656636095, "learning_rate": 9.131037658404614e-07, "loss": 0.4564, "step": 1685 }, { "ETA": 2.71, "epoch": 0.5422093584177521, "fp16_scale": 1.0, "global_step": 1686, "grad_norm": 1.8491952040675952, "learning_rate": 9.120657670440399e-07, "loss": 0.3739, "step": 1686 }, { "ETA": 2.71, "epoch": 0.5425309535295063, "fp16_scale": 1.0, "global_step": 1687, "grad_norm": 1.8070925702546399, "learning_rate": 9.110278637211642e-07, "loss": 0.471, "step": 1687 }, { "ETA": 2.71, "epoch": 0.5428525486412606, "fp16_scale": 1.0, "global_step": 1688, "grad_norm": 2.057229379965033, "learning_rate": 9.099900569987259e-07, "loss": 0.4143, "step": 1688 }, { "ETA": 2.71, "epoch": 0.543174143753015, "fp16_scale": 1.0, "global_step": 1689, "grad_norm": 1.9522907381860064, "learning_rate": 9.089523480035105e-07, "loss": 0.5105, "step": 1689 }, { "ETA": 2.71, "epoch": 0.5434957388647692, "fp16_scale": 1.0, "global_step": 1690, "grad_norm": 1.9540070864190384, "learning_rate": 9.07914737862199e-07, "loss": 0.4394, "step": 1690 }, { "ETA": 2.7, "epoch": 0.5438173339765235, "fp16_scale": 1.0, "global_step": 1691, "grad_norm": 1.9054142712571198, "learning_rate": 9.068772277013636e-07, "loss": 0.4017, "step": 1691 }, { "ETA": 2.7, "epoch": 0.5441389290882779, "fp16_scale": 1.0, "global_step": 1692, "grad_norm": 1.8408541861671888, "learning_rate": 9.058398186474693e-07, "loss": 0.4782, "step": 1692 }, { "ETA": 2.7, "epoch": 0.5444605242000322, "fp16_scale": 1.0, "global_step": 1693, "grad_norm": 2.0129338146570173, "learning_rate": 9.048025118268703e-07, "loss": 0.4568, "step": 1693 }, { "ETA": 2.7, "epoch": 0.5447821193117864, "fp16_scale": 1.0, "global_step": 1694, "grad_norm": 1.9086720470669631, "learning_rate": 9.037653083658097e-07, "loss": 0.479, "step": 1694 }, { "ETA": 2.7, "epoch": 0.5451037144235408, "fp16_scale": 1.0, "global_step": 1695, "grad_norm": 1.8780951651815723, "learning_rate": 9.027282093904194e-07, "loss": 0.5396, "step": 1695 }, { "ETA": 2.7, "epoch": 0.5454253095352951, "fp16_scale": 1.0, "global_step": 1696, "grad_norm": 2.0141600550252567, "learning_rate": 9.016912160267167e-07, "loss": 0.4775, "step": 1696 }, { "ETA": 2.69, "epoch": 0.5457469046470493, "fp16_scale": 1.0, "global_step": 1697, "grad_norm": 1.9977542792940073, "learning_rate": 9.006543294006055e-07, "loss": 0.39, "step": 1697 }, { "ETA": 2.69, "epoch": 0.5460684997588037, "fp16_scale": 1.0, "global_step": 1698, "grad_norm": 2.0100574088084437, "learning_rate": 8.996175506378727e-07, "loss": 0.4399, "step": 1698 }, { "ETA": 2.69, "epoch": 0.546390094870558, "fp16_scale": 1.0, "global_step": 1699, "grad_norm": 1.8985816971991276, "learning_rate": 8.985808808641883e-07, "loss": 0.4302, "step": 1699 }, { "ETA": 2.69, "epoch": 0.5467116899823122, "fp16_scale": 1.0, "global_step": 1700, "grad_norm": 2.2538143685809047, "learning_rate": 8.975443212051044e-07, "loss": 0.4318, "step": 1700 }, { "ETA": 2.69, "epoch": 0.5470332850940666, "fp16_scale": 1.0, "global_step": 1701, "grad_norm": 1.8387660186964476, "learning_rate": 8.965078727860531e-07, "loss": 0.4254, "step": 1701 }, { "ETA": 2.68, "epoch": 0.5473548802058209, "fp16_scale": 1.0, "global_step": 1702, "grad_norm": 1.8393170690306526, "learning_rate": 8.954715367323466e-07, "loss": 0.4502, "step": 1702 }, { "ETA": 2.68, "epoch": 0.5476764753175751, "fp16_scale": 1.0, "global_step": 1703, "grad_norm": 1.9724573500731621, "learning_rate": 8.944353141691737e-07, "loss": 0.4194, "step": 1703 }, { "ETA": 2.68, "epoch": 0.5479980704293295, "fp16_scale": 1.0, "global_step": 1704, "grad_norm": 1.8424709457975637, "learning_rate": 8.933992062216007e-07, "loss": 0.4868, "step": 1704 }, { "ETA": 2.68, "epoch": 0.5483196655410838, "fp16_scale": 1.0, "global_step": 1705, "grad_norm": 1.7973152512620978, "learning_rate": 8.9236321401457e-07, "loss": 0.4057, "step": 1705 }, { "ETA": 2.68, "epoch": 0.548641260652838, "fp16_scale": 1.0, "global_step": 1706, "grad_norm": 1.9768906189493431, "learning_rate": 8.913273386728968e-07, "loss": 0.4456, "step": 1706 }, { "ETA": 2.67, "epoch": 0.5489628557645924, "fp16_scale": 1.0, "global_step": 1707, "grad_norm": 2.254650908488793, "learning_rate": 8.90291581321272e-07, "loss": 0.3038, "step": 1707 }, { "ETA": 2.67, "epoch": 0.5492844508763467, "fp16_scale": 1.0, "global_step": 1708, "grad_norm": 1.900765301423923, "learning_rate": 8.892559430842554e-07, "loss": 0.4334, "step": 1708 }, { "ETA": 2.67, "epoch": 0.549606045988101, "fp16_scale": 1.0, "global_step": 1709, "grad_norm": 1.957232263003515, "learning_rate": 8.882204250862795e-07, "loss": 0.481, "step": 1709 }, { "ETA": 2.67, "epoch": 0.5499276410998553, "fp16_scale": 1.0, "global_step": 1710, "grad_norm": 2.0269238406443773, "learning_rate": 8.871850284516457e-07, "loss": 0.4925, "step": 1710 }, { "ETA": 2.67, "epoch": 0.5502492362116096, "fp16_scale": 1.0, "global_step": 1711, "grad_norm": 2.1005374262104626, "learning_rate": 8.861497543045229e-07, "loss": 0.4553, "step": 1711 }, { "ETA": 2.67, "epoch": 0.5505708313233639, "fp16_scale": 1.0, "global_step": 1712, "grad_norm": 2.16953459934703, "learning_rate": 8.851146037689485e-07, "loss": 0.4664, "step": 1712 }, { "ETA": 2.66, "epoch": 0.5508924264351182, "fp16_scale": 1.0, "global_step": 1713, "grad_norm": 1.9012841499281627, "learning_rate": 8.840795779688242e-07, "loss": 0.4296, "step": 1713 }, { "ETA": 2.66, "epoch": 0.5512140215468725, "fp16_scale": 1.0, "global_step": 1714, "grad_norm": 1.7657029021922768, "learning_rate": 8.830446780279175e-07, "loss": 0.3813, "step": 1714 }, { "ETA": 2.66, "epoch": 0.5515356166586268, "fp16_scale": 1.0, "global_step": 1715, "grad_norm": 2.0263439287596676, "learning_rate": 8.820099050698586e-07, "loss": 0.4914, "step": 1715 }, { "ETA": 2.66, "epoch": 0.551857211770381, "fp16_scale": 1.0, "global_step": 1716, "grad_norm": 2.3080256513629274, "learning_rate": 8.809752602181393e-07, "loss": 0.437, "step": 1716 }, { "ETA": 2.66, "epoch": 0.5521788068821354, "fp16_scale": 1.0, "global_step": 1717, "grad_norm": 2.0337037323895424, "learning_rate": 8.799407445961137e-07, "loss": 0.3954, "step": 1717 }, { "ETA": 2.65, "epoch": 0.5525004019938897, "fp16_scale": 1.0, "global_step": 1718, "grad_norm": 1.976280328684517, "learning_rate": 8.78906359326994e-07, "loss": 0.4357, "step": 1718 }, { "ETA": 2.65, "epoch": 0.552821997105644, "fp16_scale": 1.0, "global_step": 1719, "grad_norm": 2.031342436164455, "learning_rate": 8.778721055338528e-07, "loss": 0.4661, "step": 1719 }, { "ETA": 2.65, "epoch": 0.5531435922173983, "fp16_scale": 1.0, "global_step": 1720, "grad_norm": 2.0845095138524368, "learning_rate": 8.768379843396177e-07, "loss": 0.4108, "step": 1720 }, { "ETA": 2.65, "epoch": 0.5534651873291526, "fp16_scale": 1.0, "global_step": 1721, "grad_norm": 2.0812859769989847, "learning_rate": 8.758039968670742e-07, "loss": 0.5438, "step": 1721 }, { "ETA": 2.65, "epoch": 0.5537867824409068, "fp16_scale": 1.0, "global_step": 1722, "grad_norm": 1.9524169274929275, "learning_rate": 8.747701442388616e-07, "loss": 0.5052, "step": 1722 }, { "ETA": 2.65, "epoch": 0.5541083775526612, "fp16_scale": 1.0, "global_step": 1723, "grad_norm": 1.8445450082133272, "learning_rate": 8.737364275774729e-07, "loss": 0.3534, "step": 1723 }, { "ETA": 2.64, "epoch": 0.5544299726644155, "fp16_scale": 1.0, "global_step": 1724, "grad_norm": 1.8526979368638332, "learning_rate": 8.727028480052543e-07, "loss": 0.438, "step": 1724 }, { "ETA": 2.64, "epoch": 0.5547515677761699, "fp16_scale": 1.0, "global_step": 1725, "grad_norm": 1.9320559143756728, "learning_rate": 8.716694066444017e-07, "loss": 0.4378, "step": 1725 }, { "ETA": 2.64, "epoch": 0.5550731628879241, "fp16_scale": 1.0, "global_step": 1726, "grad_norm": 2.1470404152795997, "learning_rate": 8.706361046169623e-07, "loss": 0.3622, "step": 1726 }, { "ETA": 2.64, "epoch": 0.5553947579996784, "fp16_scale": 1.0, "global_step": 1727, "grad_norm": 1.9500310475599905, "learning_rate": 8.696029430448315e-07, "loss": 0.3899, "step": 1727 }, { "ETA": 2.63, "epoch": 0.5557163531114327, "fp16_scale": 1.0, "global_step": 1728, "grad_norm": 1.98374866246575, "learning_rate": 8.685699230497514e-07, "loss": 0.3588, "step": 1728 }, { "ETA": 2.63, "epoch": 0.556037948223187, "fp16_scale": 1.0, "global_step": 1729, "grad_norm": 1.8915959141609786, "learning_rate": 8.675370457533121e-07, "loss": 0.4059, "step": 1729 }, { "ETA": 2.63, "epoch": 0.5563595433349413, "fp16_scale": 1.0, "global_step": 1730, "grad_norm": 1.8856876085991405, "learning_rate": 8.665043122769472e-07, "loss": 0.4323, "step": 1730 }, { "ETA": 2.63, "epoch": 0.5566811384466956, "fp16_scale": 1.0, "global_step": 1731, "grad_norm": 1.7684561210473508, "learning_rate": 8.654717237419351e-07, "loss": 0.4433, "step": 1731 }, { "ETA": 2.63, "epoch": 0.5570027335584499, "fp16_scale": 1.0, "global_step": 1732, "grad_norm": 1.7810245850770299, "learning_rate": 8.644392812693968e-07, "loss": 0.3711, "step": 1732 }, { "ETA": 2.63, "epoch": 0.5573243286702042, "fp16_scale": 1.0, "global_step": 1733, "grad_norm": 1.9821729103551067, "learning_rate": 8.634069859802935e-07, "loss": 0.4147, "step": 1733 }, { "ETA": 2.62, "epoch": 0.5576459237819585, "fp16_scale": 1.0, "global_step": 1734, "grad_norm": 1.9543986999749898, "learning_rate": 8.623748389954281e-07, "loss": 0.4229, "step": 1734 }, { "ETA": 2.62, "epoch": 0.5579675188937128, "fp16_scale": 1.0, "global_step": 1735, "grad_norm": 2.0466926107299686, "learning_rate": 8.613428414354417e-07, "loss": 0.4453, "step": 1735 }, { "ETA": 2.62, "epoch": 0.5582891140054671, "fp16_scale": 1.0, "global_step": 1736, "grad_norm": 2.0240704794095765, "learning_rate": 8.603109944208139e-07, "loss": 0.4059, "step": 1736 }, { "ETA": 2.62, "epoch": 0.5586107091172214, "fp16_scale": 1.0, "global_step": 1737, "grad_norm": 1.8044217907734543, "learning_rate": 8.592792990718595e-07, "loss": 0.4011, "step": 1737 }, { "ETA": 2.62, "epoch": 0.5589323042289758, "fp16_scale": 1.0, "global_step": 1738, "grad_norm": 1.765446521967734, "learning_rate": 8.582477565087302e-07, "loss": 0.3369, "step": 1738 }, { "ETA": 2.61, "epoch": 0.55925389934073, "fp16_scale": 1.0, "global_step": 1739, "grad_norm": 1.918372512515096, "learning_rate": 8.572163678514106e-07, "loss": 0.428, "step": 1739 }, { "ETA": 2.61, "epoch": 0.5595754944524843, "fp16_scale": 1.0, "global_step": 1740, "grad_norm": 2.12491006514322, "learning_rate": 8.561851342197184e-07, "loss": 0.3644, "step": 1740 }, { "ETA": 2.61, "epoch": 0.5598970895642387, "fp16_scale": 1.0, "global_step": 1741, "grad_norm": 2.0690566830004933, "learning_rate": 8.55154056733304e-07, "loss": 0.4755, "step": 1741 }, { "ETA": 2.61, "epoch": 0.5602186846759929, "fp16_scale": 1.0, "global_step": 1742, "grad_norm": 2.1818024567834136, "learning_rate": 8.541231365116467e-07, "loss": 0.3926, "step": 1742 }, { "ETA": 2.6, "epoch": 0.5605402797877472, "fp16_scale": 1.0, "global_step": 1743, "grad_norm": 1.8220517974524657, "learning_rate": 8.530923746740563e-07, "loss": 0.4561, "step": 1743 }, { "ETA": 2.6, "epoch": 0.5608618748995016, "fp16_scale": 1.0, "global_step": 1744, "grad_norm": 1.9285522640876034, "learning_rate": 8.520617723396702e-07, "loss": 0.4017, "step": 1744 }, { "ETA": 2.6, "epoch": 0.5611834700112558, "fp16_scale": 1.0, "global_step": 1745, "grad_norm": 2.154624360806632, "learning_rate": 8.510313306274522e-07, "loss": 0.4656, "step": 1745 }, { "ETA": 2.6, "epoch": 0.5615050651230101, "fp16_scale": 1.0, "global_step": 1746, "grad_norm": 1.952723697648608, "learning_rate": 8.500010506561928e-07, "loss": 0.4828, "step": 1746 }, { "ETA": 2.6, "epoch": 0.5618266602347645, "fp16_scale": 1.0, "global_step": 1747, "grad_norm": 2.341615494821478, "learning_rate": 8.489709335445054e-07, "loss": 0.4129, "step": 1747 }, { "ETA": 2.6, "epoch": 0.5621482553465187, "fp16_scale": 1.0, "global_step": 1748, "grad_norm": 1.9272337703544054, "learning_rate": 8.479409804108282e-07, "loss": 0.45, "step": 1748 }, { "ETA": 2.59, "epoch": 0.562469850458273, "fp16_scale": 1.0, "global_step": 1749, "grad_norm": 1.9766694566174827, "learning_rate": 8.469111923734198e-07, "loss": 0.4172, "step": 1749 }, { "ETA": 2.59, "epoch": 0.5627914455700274, "fp16_scale": 1.0, "global_step": 1750, "grad_norm": 1.9629971541372377, "learning_rate": 8.45881570550361e-07, "loss": 0.4689, "step": 1750 }, { "ETA": 2.59, "epoch": 0.5631130406817816, "fp16_scale": 1.0, "global_step": 1751, "grad_norm": 2.004159636242066, "learning_rate": 8.44852116059551e-07, "loss": 0.4394, "step": 1751 }, { "ETA": 2.59, "epoch": 0.5634346357935359, "fp16_scale": 1.0, "global_step": 1752, "grad_norm": 1.8743231940085416, "learning_rate": 8.438228300187075e-07, "loss": 0.3934, "step": 1752 }, { "ETA": 2.59, "epoch": 0.5637562309052903, "fp16_scale": 1.0, "global_step": 1753, "grad_norm": 1.9626815796530928, "learning_rate": 8.42793713545366e-07, "loss": 0.4799, "step": 1753 }, { "ETA": 2.59, "epoch": 0.5640778260170446, "fp16_scale": 1.0, "global_step": 1754, "grad_norm": 1.938175604853239, "learning_rate": 8.417647677568772e-07, "loss": 0.4137, "step": 1754 }, { "ETA": 2.58, "epoch": 0.5643994211287988, "fp16_scale": 1.0, "global_step": 1755, "grad_norm": 1.7744177383966495, "learning_rate": 8.407359937704073e-07, "loss": 0.4248, "step": 1755 }, { "ETA": 2.58, "epoch": 0.5647210162405532, "fp16_scale": 1.0, "global_step": 1756, "grad_norm": 1.9532503847097258, "learning_rate": 8.397073927029348e-07, "loss": 0.417, "step": 1756 }, { "ETA": 2.58, "epoch": 0.5650426113523075, "fp16_scale": 1.0, "global_step": 1757, "grad_norm": 2.22065066533937, "learning_rate": 8.38678965671251e-07, "loss": 0.3306, "step": 1757 }, { "ETA": 2.58, "epoch": 0.5653642064640617, "fp16_scale": 1.0, "global_step": 1758, "grad_norm": 1.953653843613687, "learning_rate": 8.376507137919588e-07, "loss": 0.4424, "step": 1758 }, { "ETA": 2.57, "epoch": 0.565685801575816, "fp16_scale": 1.0, "global_step": 1759, "grad_norm": 2.2588435224864343, "learning_rate": 8.366226381814696e-07, "loss": 0.4022, "step": 1759 }, { "ETA": 2.57, "epoch": 0.5660073966875704, "fp16_scale": 1.0, "global_step": 1760, "grad_norm": 1.8792259041029113, "learning_rate": 8.355947399560055e-07, "loss": 0.5325, "step": 1760 }, { "ETA": 2.57, "epoch": 0.5663289917993246, "fp16_scale": 1.0, "global_step": 1761, "grad_norm": 1.8657171909019314, "learning_rate": 8.345670202315938e-07, "loss": 0.4407, "step": 1761 }, { "ETA": 2.57, "epoch": 0.566650586911079, "fp16_scale": 1.0, "global_step": 1762, "grad_norm": 1.925518197428859, "learning_rate": 8.335394801240689e-07, "loss": 0.478, "step": 1762 }, { "ETA": 2.57, "epoch": 0.5669721820228333, "fp16_scale": 1.0, "global_step": 1763, "grad_norm": 2.1507431019069347, "learning_rate": 8.325121207490709e-07, "loss": 0.4391, "step": 1763 }, { "ETA": 2.56, "epoch": 0.5672937771345875, "fp16_scale": 1.0, "global_step": 1764, "grad_norm": 2.2289426766260294, "learning_rate": 8.314849432220423e-07, "loss": 0.5425, "step": 1764 }, { "ETA": 2.56, "epoch": 0.5676153722463418, "fp16_scale": 1.0, "global_step": 1765, "grad_norm": 1.9338689981686707, "learning_rate": 8.304579486582295e-07, "loss": 0.4208, "step": 1765 }, { "ETA": 2.56, "epoch": 0.5679369673580962, "fp16_scale": 1.0, "global_step": 1766, "grad_norm": 2.2381359699266907, "learning_rate": 8.294311381726789e-07, "loss": 0.5162, "step": 1766 }, { "ETA": 2.56, "epoch": 0.5682585624698504, "fp16_scale": 1.0, "global_step": 1767, "grad_norm": 1.927108842950315, "learning_rate": 8.284045128802385e-07, "loss": 0.4399, "step": 1767 }, { "ETA": 2.56, "epoch": 0.5685801575816047, "fp16_scale": 1.0, "global_step": 1768, "grad_norm": 1.8045672220973217, "learning_rate": 8.273780738955544e-07, "loss": 0.4396, "step": 1768 }, { "ETA": 2.56, "epoch": 0.5689017526933591, "fp16_scale": 1.0, "global_step": 1769, "grad_norm": 2.206743910505071, "learning_rate": 8.263518223330696e-07, "loss": 0.4406, "step": 1769 }, { "ETA": 2.55, "epoch": 0.5692233478051134, "fp16_scale": 1.0, "global_step": 1770, "grad_norm": 1.9521633490624264, "learning_rate": 8.253257593070255e-07, "loss": 0.4123, "step": 1770 }, { "ETA": 2.55, "epoch": 0.5695449429168676, "fp16_scale": 1.0, "global_step": 1771, "grad_norm": 1.7903253362340086, "learning_rate": 8.242998859314572e-07, "loss": 0.405, "step": 1771 }, { "ETA": 2.55, "epoch": 0.569866538028622, "fp16_scale": 1.0, "global_step": 1772, "grad_norm": 1.9837005731165283, "learning_rate": 8.232742033201953e-07, "loss": 0.373, "step": 1772 }, { "ETA": 2.55, "epoch": 0.5701881331403763, "fp16_scale": 1.0, "global_step": 1773, "grad_norm": 1.9642037637432663, "learning_rate": 8.222487125868617e-07, "loss": 0.3681, "step": 1773 }, { "ETA": 2.55, "epoch": 0.5705097282521305, "fp16_scale": 1.0, "global_step": 1774, "grad_norm": 1.9251607313366765, "learning_rate": 8.212234148448707e-07, "loss": 0.4476, "step": 1774 }, { "ETA": 2.54, "epoch": 0.5708313233638849, "fp16_scale": 1.0, "global_step": 1775, "grad_norm": 2.204764698489573, "learning_rate": 8.201983112074276e-07, "loss": 0.4402, "step": 1775 }, { "ETA": 2.54, "epoch": 0.5711529184756392, "fp16_scale": 1.0, "global_step": 1776, "grad_norm": 2.2923928454141587, "learning_rate": 8.19173402787526e-07, "loss": 0.4287, "step": 1776 }, { "ETA": 2.54, "epoch": 0.5714745135873934, "fp16_scale": 1.0, "global_step": 1777, "grad_norm": 1.8780913678933817, "learning_rate": 8.181486906979487e-07, "loss": 0.4122, "step": 1777 }, { "ETA": 2.54, "epoch": 0.5717961086991478, "fp16_scale": 1.0, "global_step": 1778, "grad_norm": 1.9990477720527768, "learning_rate": 8.171241760512638e-07, "loss": 0.4013, "step": 1778 }, { "ETA": 2.54, "epoch": 0.5721177038109021, "fp16_scale": 1.0, "global_step": 1779, "grad_norm": 2.1248581938501085, "learning_rate": 8.160998599598265e-07, "loss": 0.4272, "step": 1779 }, { "ETA": 2.53, "epoch": 0.5724392989226563, "fp16_scale": 1.0, "global_step": 1780, "grad_norm": 1.8442544901366762, "learning_rate": 8.150757435357758e-07, "loss": 0.4605, "step": 1780 }, { "ETA": 2.53, "epoch": 0.5727608940344107, "fp16_scale": 1.0, "global_step": 1781, "grad_norm": 1.7804841224343924, "learning_rate": 8.140518278910329e-07, "loss": 0.4858, "step": 1781 }, { "ETA": 2.53, "epoch": 0.573082489146165, "fp16_scale": 1.0, "global_step": 1782, "grad_norm": 1.9275225079051452, "learning_rate": 8.130281141373036e-07, "loss": 0.4632, "step": 1782 }, { "ETA": 2.53, "epoch": 0.5734040842579193, "fp16_scale": 1.0, "global_step": 1783, "grad_norm": 2.1636743932746736, "learning_rate": 8.120046033860717e-07, "loss": 0.4048, "step": 1783 }, { "ETA": 2.53, "epoch": 0.5737256793696736, "fp16_scale": 1.0, "global_step": 1784, "grad_norm": 1.9719324338725284, "learning_rate": 8.109812967486024e-07, "loss": 0.4411, "step": 1784 }, { "ETA": 2.52, "epoch": 0.5740472744814279, "fp16_scale": 1.0, "global_step": 1785, "grad_norm": 2.165944310471551, "learning_rate": 8.099581953359387e-07, "loss": 0.3551, "step": 1785 }, { "ETA": 2.52, "epoch": 0.5743688695931822, "fp16_scale": 1.0, "global_step": 1786, "grad_norm": 2.048182003544626, "learning_rate": 8.089353002589001e-07, "loss": 0.4871, "step": 1786 }, { "ETA": 2.52, "epoch": 0.5746904647049365, "fp16_scale": 1.0, "global_step": 1787, "grad_norm": 2.113670197158423, "learning_rate": 8.079126126280835e-07, "loss": 0.4502, "step": 1787 }, { "ETA": 2.52, "epoch": 0.5750120598166908, "fp16_scale": 1.0, "global_step": 1788, "grad_norm": 1.8462564358983096, "learning_rate": 8.068901335538592e-07, "loss": 0.433, "step": 1788 }, { "ETA": 2.52, "epoch": 0.5753336549284451, "fp16_scale": 1.0, "global_step": 1789, "grad_norm": 2.1693396197832975, "learning_rate": 8.058678641463724e-07, "loss": 0.481, "step": 1789 }, { "ETA": 2.51, "epoch": 0.5756552500401994, "fp16_scale": 1.0, "global_step": 1790, "grad_norm": 1.9947063436479482, "learning_rate": 8.048458055155395e-07, "loss": 0.3782, "step": 1790 }, { "ETA": 2.51, "epoch": 0.5759768451519537, "fp16_scale": 1.0, "global_step": 1791, "grad_norm": 1.8931074018887186, "learning_rate": 8.038239587710484e-07, "loss": 0.4266, "step": 1791 }, { "ETA": 2.51, "epoch": 0.576298440263708, "fp16_scale": 1.0, "global_step": 1792, "grad_norm": 1.9896410413935424, "learning_rate": 8.028023250223573e-07, "loss": 0.4381, "step": 1792 }, { "ETA": 2.51, "epoch": 0.5766200353754622, "fp16_scale": 1.0, "global_step": 1793, "grad_norm": 1.889691365312974, "learning_rate": 8.017809053786924e-07, "loss": 0.4842, "step": 1793 }, { "ETA": 2.51, "epoch": 0.5769416304872166, "fp16_scale": 1.0, "global_step": 1794, "grad_norm": 1.9760310787003852, "learning_rate": 8.007597009490486e-07, "loss": 0.4218, "step": 1794 }, { "ETA": 2.51, "epoch": 0.5772632255989709, "fp16_scale": 1.0, "global_step": 1795, "grad_norm": 1.7763686340074603, "learning_rate": 7.997387128421858e-07, "loss": 0.4331, "step": 1795 }, { "ETA": 2.5, "epoch": 0.5775848207107251, "fp16_scale": 1.0, "global_step": 1796, "grad_norm": 1.8898712096989858, "learning_rate": 7.987179421666302e-07, "loss": 0.3923, "step": 1796 }, { "ETA": 2.5, "epoch": 0.5779064158224795, "fp16_scale": 1.0, "global_step": 1797, "grad_norm": 2.399264277769306, "learning_rate": 7.976973900306709e-07, "loss": 0.3966, "step": 1797 }, { "ETA": 2.5, "epoch": 0.5782280109342338, "fp16_scale": 1.0, "global_step": 1798, "grad_norm": 1.8274115851856996, "learning_rate": 7.966770575423605e-07, "loss": 0.4037, "step": 1798 }, { "ETA": 2.5, "epoch": 0.5785496060459882, "fp16_scale": 1.0, "global_step": 1799, "grad_norm": 1.96548981746754, "learning_rate": 7.956569458095133e-07, "loss": 0.3995, "step": 1799 }, { "ETA": 2.49, "epoch": 0.5788712011577424, "fp16_scale": 1.0, "global_step": 1800, "grad_norm": 2.1163447856087463, "learning_rate": 7.946370559397023e-07, "loss": 0.4212, "step": 1800 }, { "ETA": 2.5, "epoch": 0.5791927962694967, "fp16_scale": 1.0, "global_step": 1801, "grad_norm": 1.9379575899574724, "learning_rate": 7.936173890402619e-07, "loss": 0.4296, "step": 1801 }, { "ETA": 2.5, "epoch": 0.579514391381251, "fp16_scale": 1.0, "global_step": 1802, "grad_norm": 1.9297845491686623, "learning_rate": 7.92597946218283e-07, "loss": 0.4376, "step": 1802 }, { "ETA": 2.5, "epoch": 0.5798359864930053, "fp16_scale": 1.0, "global_step": 1803, "grad_norm": 2.029934187436816, "learning_rate": 7.915787285806127e-07, "loss": 0.4706, "step": 1803 }, { "ETA": 2.49, "epoch": 0.5801575816047596, "fp16_scale": 1.0, "global_step": 1804, "grad_norm": 1.9195051447849678, "learning_rate": 7.905597372338557e-07, "loss": 0.3593, "step": 1804 }, { "ETA": 2.49, "epoch": 0.580479176716514, "fp16_scale": 1.0, "global_step": 1805, "grad_norm": 2.04487864218629, "learning_rate": 7.895409732843688e-07, "loss": 0.4215, "step": 1805 }, { "ETA": 2.49, "epoch": 0.5808007718282682, "fp16_scale": 1.0, "global_step": 1806, "grad_norm": 1.980442588823403, "learning_rate": 7.885224378382631e-07, "loss": 0.4153, "step": 1806 }, { "ETA": 2.49, "epoch": 0.5811223669400225, "fp16_scale": 1.0, "global_step": 1807, "grad_norm": 1.8145489663974113, "learning_rate": 7.875041320014017e-07, "loss": 0.4452, "step": 1807 }, { "ETA": 2.49, "epoch": 0.5814439620517768, "fp16_scale": 1.0, "global_step": 1808, "grad_norm": 1.8278254072744091, "learning_rate": 7.864860568793971e-07, "loss": 0.354, "step": 1808 }, { "ETA": 2.48, "epoch": 0.5817655571635311, "fp16_scale": 1.0, "global_step": 1809, "grad_norm": 2.0362472280698642, "learning_rate": 7.854682135776131e-07, "loss": 0.442, "step": 1809 }, { "ETA": 2.48, "epoch": 0.5820871522752854, "fp16_scale": 1.0, "global_step": 1810, "grad_norm": 1.8502710374670852, "learning_rate": 7.844506032011604e-07, "loss": 0.3702, "step": 1810 }, { "ETA": 2.48, "epoch": 0.5824087473870397, "fp16_scale": 1.0, "global_step": 1811, "grad_norm": 1.8714825606521435, "learning_rate": 7.834332268548978e-07, "loss": 0.3788, "step": 1811 }, { "ETA": 2.48, "epoch": 0.582730342498794, "fp16_scale": 1.0, "global_step": 1812, "grad_norm": 1.9831501683786872, "learning_rate": 7.824160856434291e-07, "loss": 0.4583, "step": 1812 }, { "ETA": 2.48, "epoch": 0.5830519376105483, "fp16_scale": 1.0, "global_step": 1813, "grad_norm": 2.0505702329085342, "learning_rate": 7.813991806711039e-07, "loss": 0.4133, "step": 1813 }, { "ETA": 2.47, "epoch": 0.5833735327223026, "fp16_scale": 1.0, "global_step": 1814, "grad_norm": 1.8947002380528628, "learning_rate": 7.803825130420141e-07, "loss": 0.3833, "step": 1814 }, { "ETA": 2.47, "epoch": 0.583695127834057, "fp16_scale": 1.0, "global_step": 1815, "grad_norm": 1.9725055107529188, "learning_rate": 7.793660838599942e-07, "loss": 0.3699, "step": 1815 }, { "ETA": 2.47, "epoch": 0.5840167229458112, "fp16_scale": 1.0, "global_step": 1816, "grad_norm": 1.8382586942742276, "learning_rate": 7.783498942286211e-07, "loss": 0.4018, "step": 1816 }, { "ETA": 2.47, "epoch": 0.5843383180575655, "fp16_scale": 1.0, "global_step": 1817, "grad_norm": 2.0241792152858986, "learning_rate": 7.773339452512096e-07, "loss": 0.4222, "step": 1817 }, { "ETA": 2.47, "epoch": 0.5846599131693199, "fp16_scale": 1.0, "global_step": 1818, "grad_norm": 2.0391250336105777, "learning_rate": 7.763182380308146e-07, "loss": 0.3534, "step": 1818 }, { "ETA": 2.46, "epoch": 0.5849815082810741, "fp16_scale": 1.0, "global_step": 1819, "grad_norm": 1.994254291959461, "learning_rate": 7.753027736702282e-07, "loss": 0.4023, "step": 1819 }, { "ETA": 2.46, "epoch": 0.5853031033928284, "fp16_scale": 1.0, "global_step": 1820, "grad_norm": 1.8984747643448134, "learning_rate": 7.742875532719785e-07, "loss": 0.4169, "step": 1820 }, { "ETA": 2.46, "epoch": 0.5856246985045828, "fp16_scale": 1.0, "global_step": 1821, "grad_norm": 2.032035605353064, "learning_rate": 7.732725779383294e-07, "loss": 0.4071, "step": 1821 }, { "ETA": 2.46, "epoch": 0.585946293616337, "fp16_scale": 1.0, "global_step": 1822, "grad_norm": 2.0041827741214555, "learning_rate": 7.722578487712775e-07, "loss": 0.5558, "step": 1822 }, { "ETA": 2.46, "epoch": 0.5862678887280913, "fp16_scale": 1.0, "global_step": 1823, "grad_norm": 2.1584081221774865, "learning_rate": 7.712433668725536e-07, "loss": 0.3674, "step": 1823 }, { "ETA": 2.45, "epoch": 0.5865894838398457, "fp16_scale": 1.0, "global_step": 1824, "grad_norm": 2.001619097438793, "learning_rate": 7.70229133343619e-07, "loss": 0.3917, "step": 1824 }, { "ETA": 2.45, "epoch": 0.5869110789515999, "fp16_scale": 1.0, "global_step": 1825, "grad_norm": 1.9983846520932798, "learning_rate": 7.69215149285666e-07, "loss": 0.4428, "step": 1825 }, { "ETA": 2.45, "epoch": 0.5872326740633542, "fp16_scale": 1.0, "global_step": 1826, "grad_norm": 1.9755229060951203, "learning_rate": 7.682014157996154e-07, "loss": 0.3363, "step": 1826 }, { "ETA": 2.45, "epoch": 0.5875542691751086, "fp16_scale": 1.0, "global_step": 1827, "grad_norm": 1.8847486667873525, "learning_rate": 7.671879339861161e-07, "loss": 0.3591, "step": 1827 }, { "ETA": 2.45, "epoch": 0.5878758642868629, "fp16_scale": 1.0, "global_step": 1828, "grad_norm": 1.7688928639790573, "learning_rate": 7.661747049455443e-07, "loss": 0.3714, "step": 1828 }, { "ETA": 2.44, "epoch": 0.5881974593986171, "fp16_scale": 1.0, "global_step": 1829, "grad_norm": 1.8099458098342016, "learning_rate": 7.65161729778001e-07, "loss": 0.3844, "step": 1829 }, { "ETA": 2.44, "epoch": 0.5885190545103715, "fp16_scale": 1.0, "global_step": 1830, "grad_norm": 2.010363991501376, "learning_rate": 7.641490095833125e-07, "loss": 0.4396, "step": 1830 }, { "ETA": 2.44, "epoch": 0.5888406496221258, "fp16_scale": 1.0, "global_step": 1831, "grad_norm": 2.270810950706374, "learning_rate": 7.631365454610273e-07, "loss": 0.3659, "step": 1831 }, { "ETA": 2.44, "epoch": 0.58916224473388, "fp16_scale": 1.0, "global_step": 1832, "grad_norm": 1.9878661551203023, "learning_rate": 7.621243385104159e-07, "loss": 0.485, "step": 1832 }, { "ETA": 2.44, "epoch": 0.5894838398456343, "fp16_scale": 1.0, "global_step": 1833, "grad_norm": 2.0226311551404885, "learning_rate": 7.611123898304708e-07, "loss": 0.3489, "step": 1833 }, { "ETA": 2.43, "epoch": 0.5898054349573887, "fp16_scale": 1.0, "global_step": 1834, "grad_norm": 1.9576918252737419, "learning_rate": 7.601007005199021e-07, "loss": 0.4721, "step": 1834 }, { "ETA": 2.43, "epoch": 0.5901270300691429, "fp16_scale": 1.0, "global_step": 1835, "grad_norm": 2.0687996999559446, "learning_rate": 7.590892716771407e-07, "loss": 0.4394, "step": 1835 }, { "ETA": 2.43, "epoch": 0.5904486251808972, "fp16_scale": 1.0, "global_step": 1836, "grad_norm": 2.2151135086262377, "learning_rate": 7.580781044003324e-07, "loss": 0.4364, "step": 1836 }, { "ETA": 2.43, "epoch": 0.5907702202926516, "fp16_scale": 1.0, "global_step": 1837, "grad_norm": 2.152376591059747, "learning_rate": 7.570671997873404e-07, "loss": 0.3745, "step": 1837 }, { "ETA": 2.42, "epoch": 0.5910918154044058, "fp16_scale": 1.0, "global_step": 1838, "grad_norm": 2.4006392471702873, "learning_rate": 7.560565589357426e-07, "loss": 0.3809, "step": 1838 }, { "ETA": 2.42, "epoch": 0.5914134105161601, "fp16_scale": 1.0, "global_step": 1839, "grad_norm": 1.9557808284807419, "learning_rate": 7.550461829428296e-07, "loss": 0.3583, "step": 1839 }, { "ETA": 2.42, "epoch": 0.5917350056279145, "fp16_scale": 1.0, "global_step": 1840, "grad_norm": 2.18443290175363, "learning_rate": 7.540360729056058e-07, "loss": 0.4636, "step": 1840 }, { "ETA": 2.42, "epoch": 0.5920566007396687, "fp16_scale": 1.0, "global_step": 1841, "grad_norm": 1.8793855185965898, "learning_rate": 7.530262299207856e-07, "loss": 0.5123, "step": 1841 }, { "ETA": 2.42, "epoch": 0.592378195851423, "fp16_scale": 1.0, "global_step": 1842, "grad_norm": 2.0013663810307314, "learning_rate": 7.520166550847944e-07, "loss": 0.3656, "step": 1842 }, { "ETA": 2.42, "epoch": 0.5926997909631774, "fp16_scale": 1.0, "global_step": 1843, "grad_norm": 2.063186431494864, "learning_rate": 7.510073494937662e-07, "loss": 0.4559, "step": 1843 }, { "ETA": 2.41, "epoch": 0.5930213860749317, "fp16_scale": 1.0, "global_step": 1844, "grad_norm": 1.9613673142244223, "learning_rate": 7.499983142435418e-07, "loss": 0.4223, "step": 1844 }, { "ETA": 2.41, "epoch": 0.5933429811866859, "fp16_scale": 1.0, "global_step": 1845, "grad_norm": 1.884823087504064, "learning_rate": 7.489895504296697e-07, "loss": 0.3825, "step": 1845 }, { "ETA": 2.41, "epoch": 0.5936645762984403, "fp16_scale": 1.0, "global_step": 1846, "grad_norm": 1.9092158504687446, "learning_rate": 7.47981059147403e-07, "loss": 0.4311, "step": 1846 }, { "ETA": 2.41, "epoch": 0.5939861714101946, "fp16_scale": 1.0, "global_step": 1847, "grad_norm": 2.0335014532424953, "learning_rate": 7.469728414916994e-07, "loss": 0.4201, "step": 1847 }, { "ETA": 2.41, "epoch": 0.5943077665219488, "fp16_scale": 1.0, "global_step": 1848, "grad_norm": 2.401168333894695, "learning_rate": 7.45964898557219e-07, "loss": 0.4907, "step": 1848 }, { "ETA": 2.4, "epoch": 0.5946293616337032, "fp16_scale": 1.0, "global_step": 1849, "grad_norm": 1.969279338138325, "learning_rate": 7.449572314383236e-07, "loss": 0.4598, "step": 1849 }, { "ETA": 2.4, "epoch": 0.5949509567454575, "fp16_scale": 1.0, "global_step": 1850, "grad_norm": 1.7639705258674894, "learning_rate": 7.439498412290762e-07, "loss": 0.3923, "step": 1850 }, { "ETA": 2.4, "epoch": 0.5952725518572117, "fp16_scale": 1.0, "global_step": 1851, "grad_norm": 2.0034800166056272, "learning_rate": 7.429427290232384e-07, "loss": 0.4317, "step": 1851 }, { "ETA": 2.4, "epoch": 0.5955941469689661, "fp16_scale": 1.0, "global_step": 1852, "grad_norm": 2.0895070852648097, "learning_rate": 7.419358959142708e-07, "loss": 0.4073, "step": 1852 }, { "ETA": 2.4, "epoch": 0.5959157420807204, "fp16_scale": 1.0, "global_step": 1853, "grad_norm": 1.9109434808336507, "learning_rate": 7.409293429953296e-07, "loss": 0.3811, "step": 1853 }, { "ETA": 2.39, "epoch": 0.5962373371924746, "fp16_scale": 1.0, "global_step": 1854, "grad_norm": 1.8864076099936178, "learning_rate": 7.399230713592683e-07, "loss": 0.4362, "step": 1854 }, { "ETA": 2.39, "epoch": 0.596558932304229, "fp16_scale": 1.0, "global_step": 1855, "grad_norm": 1.8161771650077314, "learning_rate": 7.389170820986345e-07, "loss": 0.3522, "step": 1855 }, { "ETA": 2.39, "epoch": 0.5968805274159833, "fp16_scale": 1.0, "global_step": 1856, "grad_norm": 2.1497221707520264, "learning_rate": 7.379113763056679e-07, "loss": 0.5376, "step": 1856 }, { "ETA": 2.39, "epoch": 0.5972021225277376, "fp16_scale": 1.0, "global_step": 1857, "grad_norm": 2.0648782697948525, "learning_rate": 7.369059550723031e-07, "loss": 0.4663, "step": 1857 }, { "ETA": 2.39, "epoch": 0.5975237176394919, "fp16_scale": 1.0, "global_step": 1858, "grad_norm": 1.9443453860732576, "learning_rate": 7.359008194901632e-07, "loss": 0.4608, "step": 1858 }, { "ETA": 2.38, "epoch": 0.5978453127512462, "fp16_scale": 1.0, "global_step": 1859, "grad_norm": 1.9649638216733079, "learning_rate": 7.348959706505626e-07, "loss": 0.4703, "step": 1859 }, { "ETA": 2.38, "epoch": 0.5981669078630005, "fp16_scale": 1.0, "global_step": 1860, "grad_norm": 1.8028674664504476, "learning_rate": 7.338914096445041e-07, "loss": 0.4174, "step": 1860 }, { "ETA": 2.38, "epoch": 0.5984885029747548, "fp16_scale": 1.0, "global_step": 1861, "grad_norm": 1.9784286461654301, "learning_rate": 7.328871375626771e-07, "loss": 0.4348, "step": 1861 }, { "ETA": 2.38, "epoch": 0.5988100980865091, "fp16_scale": 1.0, "global_step": 1862, "grad_norm": 1.9713681338338356, "learning_rate": 7.31883155495459e-07, "loss": 0.4722, "step": 1862 }, { "ETA": 2.38, "epoch": 0.5991316931982634, "fp16_scale": 1.0, "global_step": 1863, "grad_norm": 2.128705032392235, "learning_rate": 7.308794645329105e-07, "loss": 0.3703, "step": 1863 }, { "ETA": 2.38, "epoch": 0.5994532883100177, "fp16_scale": 1.0, "global_step": 1864, "grad_norm": 1.863216906100975, "learning_rate": 7.298760657647778e-07, "loss": 0.426, "step": 1864 }, { "ETA": 2.37, "epoch": 0.599774883421772, "fp16_scale": 1.0, "global_step": 1865, "grad_norm": 2.073626712959002, "learning_rate": 7.288729602804891e-07, "loss": 0.3698, "step": 1865 }, { "ETA": 2.37, "epoch": 0.6000964785335263, "fp16_scale": 1.0, "global_step": 1866, "grad_norm": 2.121206658616612, "learning_rate": 7.278701491691537e-07, "loss": 0.4141, "step": 1866 }, { "ETA": 2.37, "epoch": 0.6004180736452805, "fp16_scale": 1.0, "global_step": 1867, "grad_norm": 1.9570534911328257, "learning_rate": 7.268676335195623e-07, "loss": 0.3992, "step": 1867 }, { "ETA": 2.37, "epoch": 0.6007396687570349, "fp16_scale": 1.0, "global_step": 1868, "grad_norm": 1.830324613897496, "learning_rate": 7.258654144201839e-07, "loss": 0.3837, "step": 1868 }, { "ETA": 2.36, "epoch": 0.6010612638687892, "fp16_scale": 1.0, "global_step": 1869, "grad_norm": 2.1064898331486224, "learning_rate": 7.248634929591667e-07, "loss": 0.4326, "step": 1869 }, { "ETA": 2.36, "epoch": 0.6013828589805434, "fp16_scale": 1.0, "global_step": 1870, "grad_norm": 1.9603670404047848, "learning_rate": 7.238618702243338e-07, "loss": 0.3825, "step": 1870 }, { "ETA": 2.36, "epoch": 0.6017044540922978, "fp16_scale": 1.0, "global_step": 1871, "grad_norm": 1.717941156944836, "learning_rate": 7.228605473031866e-07, "loss": 0.3497, "step": 1871 }, { "ETA": 2.36, "epoch": 0.6020260492040521, "fp16_scale": 1.0, "global_step": 1872, "grad_norm": 1.8542634815483454, "learning_rate": 7.218595252828985e-07, "loss": 0.4263, "step": 1872 }, { "ETA": 2.36, "epoch": 0.6023476443158065, "fp16_scale": 1.0, "global_step": 1873, "grad_norm": 1.9691349002044445, "learning_rate": 7.208588052503173e-07, "loss": 0.4024, "step": 1873 }, { "ETA": 2.35, "epoch": 0.6026692394275607, "fp16_scale": 1.0, "global_step": 1874, "grad_norm": 2.1285946804020965, "learning_rate": 7.198583882919636e-07, "loss": 0.4332, "step": 1874 }, { "ETA": 2.35, "epoch": 0.602990834539315, "fp16_scale": 1.0, "global_step": 1875, "grad_norm": 2.10217432293654, "learning_rate": 7.188582754940273e-07, "loss": 0.3837, "step": 1875 }, { "ETA": 2.35, "epoch": 0.6033124296510693, "fp16_scale": 1.0, "global_step": 1876, "grad_norm": 1.8823643332979483, "learning_rate": 7.178584679423694e-07, "loss": 0.3772, "step": 1876 }, { "ETA": 2.35, "epoch": 0.6036340247628236, "fp16_scale": 1.0, "global_step": 1877, "grad_norm": 2.082038138133417, "learning_rate": 7.168589667225191e-07, "loss": 0.504, "step": 1877 }, { "ETA": 2.35, "epoch": 0.6039556198745779, "fp16_scale": 1.0, "global_step": 1878, "grad_norm": 2.0510651597553493, "learning_rate": 7.158597729196723e-07, "loss": 0.4473, "step": 1878 }, { "ETA": 2.35, "epoch": 0.6042772149863322, "fp16_scale": 1.0, "global_step": 1879, "grad_norm": 1.863623728356378, "learning_rate": 7.14860887618693e-07, "loss": 0.4507, "step": 1879 }, { "ETA": 2.34, "epoch": 0.6045988100980865, "fp16_scale": 1.0, "global_step": 1880, "grad_norm": 1.740905279842798, "learning_rate": 7.138623119041079e-07, "loss": 0.4278, "step": 1880 }, { "ETA": 2.34, "epoch": 0.6049204052098408, "fp16_scale": 1.0, "global_step": 1881, "grad_norm": 1.9298202082804343, "learning_rate": 7.12864046860109e-07, "loss": 0.4082, "step": 1881 }, { "ETA": 2.34, "epoch": 0.6052420003215951, "fp16_scale": 1.0, "global_step": 1882, "grad_norm": 2.0284492390917843, "learning_rate": 7.118660935705509e-07, "loss": 0.4157, "step": 1882 }, { "ETA": 2.34, "epoch": 0.6055635954333494, "fp16_scale": 1.0, "global_step": 1883, "grad_norm": 2.291831988981119, "learning_rate": 7.108684531189496e-07, "loss": 0.4533, "step": 1883 }, { "ETA": 2.34, "epoch": 0.6058851905451037, "fp16_scale": 1.0, "global_step": 1884, "grad_norm": 1.6440238224138444, "learning_rate": 7.09871126588481e-07, "loss": 0.3447, "step": 1884 }, { "ETA": 2.33, "epoch": 0.606206785656858, "fp16_scale": 1.0, "global_step": 1885, "grad_norm": 2.024599681836863, "learning_rate": 7.088741150619803e-07, "loss": 0.3901, "step": 1885 }, { "ETA": 2.33, "epoch": 0.6065283807686123, "fp16_scale": 1.0, "global_step": 1886, "grad_norm": 1.8890920986588906, "learning_rate": 7.078774196219413e-07, "loss": 0.457, "step": 1886 }, { "ETA": 2.33, "epoch": 0.6068499758803666, "fp16_scale": 1.0, "global_step": 1887, "grad_norm": 1.970377818957128, "learning_rate": 7.06881041350514e-07, "loss": 0.4278, "step": 1887 }, { "ETA": 2.33, "epoch": 0.6071715709921209, "fp16_scale": 1.0, "global_step": 1888, "grad_norm": 1.9325448841424304, "learning_rate": 7.058849813295049e-07, "loss": 0.4125, "step": 1888 }, { "ETA": 2.33, "epoch": 0.6074931661038753, "fp16_scale": 1.0, "global_step": 1889, "grad_norm": 1.964317539985117, "learning_rate": 7.048892406403733e-07, "loss": 0.4113, "step": 1889 }, { "ETA": 2.32, "epoch": 0.6078147612156295, "fp16_scale": 1.0, "global_step": 1890, "grad_norm": 1.8482170685923327, "learning_rate": 7.03893820364233e-07, "loss": 0.3398, "step": 1890 }, { "ETA": 2.32, "epoch": 0.6081363563273838, "fp16_scale": 1.0, "global_step": 1891, "grad_norm": 2.026553743125374, "learning_rate": 7.028987215818505e-07, "loss": 0.424, "step": 1891 }, { "ETA": 2.32, "epoch": 0.6084579514391382, "fp16_scale": 1.0, "global_step": 1892, "grad_norm": 2.1040219322450895, "learning_rate": 7.019039453736413e-07, "loss": 0.3661, "step": 1892 }, { "ETA": 2.32, "epoch": 0.6087795465508924, "fp16_scale": 1.0, "global_step": 1893, "grad_norm": 1.8607005041624993, "learning_rate": 7.009094928196727e-07, "loss": 0.4016, "step": 1893 }, { "ETA": 2.32, "epoch": 0.6091011416626467, "fp16_scale": 1.0, "global_step": 1894, "grad_norm": 2.033089810273623, "learning_rate": 6.999153649996594e-07, "loss": 0.3914, "step": 1894 }, { "ETA": 2.31, "epoch": 0.6094227367744011, "fp16_scale": 1.0, "global_step": 1895, "grad_norm": 1.980625160789773, "learning_rate": 6.989215629929637e-07, "loss": 0.4594, "step": 1895 }, { "ETA": 2.31, "epoch": 0.6097443318861553, "fp16_scale": 1.0, "global_step": 1896, "grad_norm": 1.83085786985307, "learning_rate": 6.979280878785947e-07, "loss": 0.3964, "step": 1896 }, { "ETA": 2.31, "epoch": 0.6100659269979096, "fp16_scale": 1.0, "global_step": 1897, "grad_norm": 1.9794632495257205, "learning_rate": 6.969349407352056e-07, "loss": 0.4243, "step": 1897 }, { "ETA": 2.31, "epoch": 0.610387522109664, "fp16_scale": 1.0, "global_step": 1898, "grad_norm": 2.0475496147883527, "learning_rate": 6.959421226410947e-07, "loss": 0.3993, "step": 1898 }, { "ETA": 2.31, "epoch": 0.6107091172214182, "fp16_scale": 1.0, "global_step": 1899, "grad_norm": 2.007163917754024, "learning_rate": 6.949496346742017e-07, "loss": 0.4549, "step": 1899 }, { "ETA": 2.3, "epoch": 0.6110307123331725, "fp16_scale": 1.0, "global_step": 1900, "grad_norm": 2.073288861320127, "learning_rate": 6.939574779121093e-07, "loss": 0.4442, "step": 1900 }, { "ETA": 2.3, "epoch": 0.6113523074449269, "fp16_scale": 1.0, "global_step": 1901, "grad_norm": 2.0703613897445434, "learning_rate": 6.929656534320397e-07, "loss": 0.3771, "step": 1901 }, { "ETA": 2.3, "epoch": 0.6116739025566812, "fp16_scale": 1.0, "global_step": 1902, "grad_norm": 2.1497329821966957, "learning_rate": 6.919741623108542e-07, "loss": 0.4377, "step": 1902 }, { "ETA": 2.3, "epoch": 0.6119954976684354, "fp16_scale": 1.0, "global_step": 1903, "grad_norm": 2.0351757605739116, "learning_rate": 6.909830056250526e-07, "loss": 0.5291, "step": 1903 }, { "ETA": 2.3, "epoch": 0.6123170927801898, "fp16_scale": 1.0, "global_step": 1904, "grad_norm": 2.07226105653468, "learning_rate": 6.899921844507714e-07, "loss": 0.4179, "step": 1904 }, { "ETA": 2.29, "epoch": 0.6126386878919441, "fp16_scale": 1.0, "global_step": 1905, "grad_norm": 1.9487416991930222, "learning_rate": 6.890016998637836e-07, "loss": 0.3382, "step": 1905 }, { "ETA": 2.29, "epoch": 0.6129602830036983, "fp16_scale": 1.0, "global_step": 1906, "grad_norm": 2.0221826982488005, "learning_rate": 6.880115529394952e-07, "loss": 0.4389, "step": 1906 }, { "ETA": 2.29, "epoch": 0.6132818781154526, "fp16_scale": 1.0, "global_step": 1907, "grad_norm": 1.9054340759625708, "learning_rate": 6.870217447529463e-07, "loss": 0.4863, "step": 1907 }, { "ETA": 2.29, "epoch": 0.613603473227207, "fp16_scale": 1.0, "global_step": 1908, "grad_norm": 2.0179621149474336, "learning_rate": 6.860322763788101e-07, "loss": 0.4488, "step": 1908 }, { "ETA": 2.29, "epoch": 0.6139250683389612, "fp16_scale": 1.0, "global_step": 1909, "grad_norm": 2.098744115240401, "learning_rate": 6.850431488913895e-07, "loss": 0.429, "step": 1909 }, { "ETA": 2.29, "epoch": 0.6142466634507155, "fp16_scale": 1.0, "global_step": 1910, "grad_norm": 2.0395094518502312, "learning_rate": 6.840543633646186e-07, "loss": 0.4948, "step": 1910 }, { "ETA": 2.28, "epoch": 0.6145682585624699, "fp16_scale": 1.0, "global_step": 1911, "grad_norm": 1.7849757703820304, "learning_rate": 6.830659208720587e-07, "loss": 0.4736, "step": 1911 }, { "ETA": 2.28, "epoch": 0.6148898536742241, "fp16_scale": 1.0, "global_step": 1912, "grad_norm": 2.15187119824761, "learning_rate": 6.820778224868998e-07, "loss": 0.4395, "step": 1912 }, { "ETA": 2.28, "epoch": 0.6152114487859784, "fp16_scale": 1.0, "global_step": 1913, "grad_norm": 1.8579408402211197, "learning_rate": 6.810900692819581e-07, "loss": 0.4385, "step": 1913 }, { "ETA": 2.28, "epoch": 0.6155330438977328, "fp16_scale": 1.0, "global_step": 1914, "grad_norm": 1.8482655430633033, "learning_rate": 6.801026623296744e-07, "loss": 0.4306, "step": 1914 }, { "ETA": 2.28, "epoch": 0.615854639009487, "fp16_scale": 1.0, "global_step": 1915, "grad_norm": 1.841883593780279, "learning_rate": 6.791156027021147e-07, "loss": 0.4127, "step": 1915 }, { "ETA": 2.28, "epoch": 0.6161762341212413, "fp16_scale": 1.0, "global_step": 1916, "grad_norm": 2.080739819301582, "learning_rate": 6.781288914709665e-07, "loss": 0.4478, "step": 1916 }, { "ETA": 2.27, "epoch": 0.6164978292329957, "fp16_scale": 1.0, "global_step": 1917, "grad_norm": 1.735666030974362, "learning_rate": 6.771425297075404e-07, "loss": 0.3685, "step": 1917 }, { "ETA": 2.27, "epoch": 0.61681942434475, "fp16_scale": 1.0, "global_step": 1918, "grad_norm": 2.2135331991280776, "learning_rate": 6.76156518482767e-07, "loss": 0.4302, "step": 1918 }, { "ETA": 2.27, "epoch": 0.6171410194565042, "fp16_scale": 1.0, "global_step": 1919, "grad_norm": 2.060755616454066, "learning_rate": 6.751708588671954e-07, "loss": 0.4693, "step": 1919 }, { "ETA": 2.27, "epoch": 0.6174626145682586, "fp16_scale": 1.0, "global_step": 1920, "grad_norm": 1.8526713368355843, "learning_rate": 6.741855519309947e-07, "loss": 0.4548, "step": 1920 }, { "ETA": 2.26, "epoch": 0.6177842096800129, "fp16_scale": 1.0, "global_step": 1921, "grad_norm": 2.0922034155667544, "learning_rate": 6.732005987439493e-07, "loss": 0.4153, "step": 1921 }, { "ETA": 2.26, "epoch": 0.6181058047917671, "fp16_scale": 1.0, "global_step": 1922, "grad_norm": 1.9621669577526282, "learning_rate": 6.722160003754616e-07, "loss": 0.433, "step": 1922 }, { "ETA": 2.26, "epoch": 0.6184273999035215, "fp16_scale": 1.0, "global_step": 1923, "grad_norm": 2.1596970828730875, "learning_rate": 6.712317578945463e-07, "loss": 0.4813, "step": 1923 }, { "ETA": 2.26, "epoch": 0.6187489950152758, "fp16_scale": 1.0, "global_step": 1924, "grad_norm": 1.969432853605627, "learning_rate": 6.702478723698335e-07, "loss": 0.5096, "step": 1924 }, { "ETA": 2.26, "epoch": 0.61907059012703, "fp16_scale": 1.0, "global_step": 1925, "grad_norm": 1.942849372023299, "learning_rate": 6.692643448695653e-07, "loss": 0.3988, "step": 1925 }, { "ETA": 2.26, "epoch": 0.6193921852387844, "fp16_scale": 1.0, "global_step": 1926, "grad_norm": 2.0304285304895644, "learning_rate": 6.682811764615946e-07, "loss": 0.4563, "step": 1926 }, { "ETA": 2.25, "epoch": 0.6197137803505387, "fp16_scale": 1.0, "global_step": 1927, "grad_norm": 1.9426106278397655, "learning_rate": 6.672983682133854e-07, "loss": 0.3837, "step": 1927 }, { "ETA": 2.25, "epoch": 0.6200353754622929, "fp16_scale": 1.0, "global_step": 1928, "grad_norm": 1.9978059912858224, "learning_rate": 6.663159211920093e-07, "loss": 0.4268, "step": 1928 }, { "ETA": 2.25, "epoch": 0.6203569705740473, "fp16_scale": 1.0, "global_step": 1929, "grad_norm": 1.8887839782993492, "learning_rate": 6.653338364641471e-07, "loss": 0.4658, "step": 1929 }, { "ETA": 2.25, "epoch": 0.6206785656858016, "fp16_scale": 1.0, "global_step": 1930, "grad_norm": 2.172058250104252, "learning_rate": 6.643521150960854e-07, "loss": 0.4601, "step": 1930 }, { "ETA": 2.25, "epoch": 0.6210001607975558, "fp16_scale": 1.0, "global_step": 1931, "grad_norm": 1.942563500353798, "learning_rate": 6.633707581537158e-07, "loss": 0.4898, "step": 1931 }, { "ETA": 2.25, "epoch": 0.6213217559093102, "fp16_scale": 1.0, "global_step": 1932, "grad_norm": 1.6498254195120632, "learning_rate": 6.623897667025363e-07, "loss": 0.4492, "step": 1932 }, { "ETA": 2.24, "epoch": 0.6216433510210645, "fp16_scale": 1.0, "global_step": 1933, "grad_norm": 2.3993883773088918, "learning_rate": 6.614091418076452e-07, "loss": 0.3456, "step": 1933 }, { "ETA": 2.24, "epoch": 0.6219649461328188, "fp16_scale": 1.0, "global_step": 1934, "grad_norm": 1.8617557065988062, "learning_rate": 6.604288845337452e-07, "loss": 0.4768, "step": 1934 }, { "ETA": 2.24, "epoch": 0.622286541244573, "fp16_scale": 1.0, "global_step": 1935, "grad_norm": 1.893530852376071, "learning_rate": 6.59448995945139e-07, "loss": 0.3804, "step": 1935 }, { "ETA": 2.24, "epoch": 0.6226081363563274, "fp16_scale": 1.0, "global_step": 1936, "grad_norm": 2.0786509523274965, "learning_rate": 6.584694771057284e-07, "loss": 0.3875, "step": 1936 }, { "ETA": 2.24, "epoch": 0.6229297314680817, "fp16_scale": 1.0, "global_step": 1937, "grad_norm": 1.912223968505479, "learning_rate": 6.574903290790149e-07, "loss": 0.4234, "step": 1937 }, { "ETA": 2.23, "epoch": 0.623251326579836, "fp16_scale": 1.0, "global_step": 1938, "grad_norm": 1.8817215013769721, "learning_rate": 6.56511552928096e-07, "loss": 0.456, "step": 1938 }, { "ETA": 2.23, "epoch": 0.6235729216915903, "fp16_scale": 1.0, "global_step": 1939, "grad_norm": 2.3199546637651274, "learning_rate": 6.555331497156671e-07, "loss": 0.4695, "step": 1939 }, { "ETA": 2.23, "epoch": 0.6238945168033446, "fp16_scale": 1.0, "global_step": 1940, "grad_norm": 1.8956014802235357, "learning_rate": 6.545551205040173e-07, "loss": 0.5008, "step": 1940 }, { "ETA": 2.23, "epoch": 0.6242161119150988, "fp16_scale": 1.0, "global_step": 1941, "grad_norm": 2.0032353917606947, "learning_rate": 6.535774663550309e-07, "loss": 0.4442, "step": 1941 }, { "ETA": 2.23, "epoch": 0.6245377070268532, "fp16_scale": 1.0, "global_step": 1942, "grad_norm": 1.757997986879624, "learning_rate": 6.526001883301832e-07, "loss": 0.3738, "step": 1942 }, { "ETA": 2.22, "epoch": 0.6248593021386075, "fp16_scale": 1.0, "global_step": 1943, "grad_norm": 1.761485498037052, "learning_rate": 6.516232874905427e-07, "loss": 0.4376, "step": 1943 }, { "ETA": 2.22, "epoch": 0.6251808972503617, "fp16_scale": 1.0, "global_step": 1944, "grad_norm": 1.9214973154495354, "learning_rate": 6.506467648967683e-07, "loss": 0.4894, "step": 1944 }, { "ETA": 2.22, "epoch": 0.6255024923621161, "fp16_scale": 1.0, "global_step": 1945, "grad_norm": 1.857485549019393, "learning_rate": 6.496706216091065e-07, "loss": 0.4339, "step": 1945 }, { "ETA": 2.22, "epoch": 0.6258240874738704, "fp16_scale": 1.0, "global_step": 1946, "grad_norm": 1.9873136616166283, "learning_rate": 6.486948586873948e-07, "loss": 0.3943, "step": 1946 }, { "ETA": 2.22, "epoch": 0.6261456825856248, "fp16_scale": 1.0, "global_step": 1947, "grad_norm": 1.9182273871188733, "learning_rate": 6.477194771910553e-07, "loss": 0.417, "step": 1947 }, { "ETA": 2.21, "epoch": 0.626467277697379, "fp16_scale": 1.0, "global_step": 1948, "grad_norm": 2.1600667481627926, "learning_rate": 6.467444781790966e-07, "loss": 0.4486, "step": 1948 }, { "ETA": 2.21, "epoch": 0.6267888728091333, "fp16_scale": 1.0, "global_step": 1949, "grad_norm": 1.7982034937822307, "learning_rate": 6.457698627101131e-07, "loss": 0.4166, "step": 1949 }, { "ETA": 2.21, "epoch": 0.6271104679208876, "fp16_scale": 1.0, "global_step": 1950, "grad_norm": 2.0885425256491366, "learning_rate": 6.447956318422811e-07, "loss": 0.3997, "step": 1950 }, { "ETA": 2.21, "epoch": 0.6274320630326419, "fp16_scale": 1.0, "global_step": 1951, "grad_norm": 1.8681279298899824, "learning_rate": 6.438217866333607e-07, "loss": 0.4551, "step": 1951 }, { "ETA": 2.21, "epoch": 0.6277536581443962, "fp16_scale": 1.0, "global_step": 1952, "grad_norm": 1.8907531289709132, "learning_rate": 6.428483281406927e-07, "loss": 0.449, "step": 1952 }, { "ETA": 2.2, "epoch": 0.6280752532561505, "fp16_scale": 1.0, "global_step": 1953, "grad_norm": 1.7998942403187896, "learning_rate": 6.418752574211972e-07, "loss": 0.4732, "step": 1953 }, { "ETA": 2.2, "epoch": 0.6283968483679048, "fp16_scale": 1.0, "global_step": 1954, "grad_norm": 1.883198087188713, "learning_rate": 6.409025755313756e-07, "loss": 0.4562, "step": 1954 }, { "ETA": 2.2, "epoch": 0.6287184434796591, "fp16_scale": 1.0, "global_step": 1955, "grad_norm": 1.975106498305029, "learning_rate": 6.399302835273046e-07, "loss": 0.4571, "step": 1955 }, { "ETA": 2.2, "epoch": 0.6290400385914134, "fp16_scale": 1.0, "global_step": 1956, "grad_norm": 1.8604538136213093, "learning_rate": 6.38958382464639e-07, "loss": 0.3717, "step": 1956 }, { "ETA": 2.2, "epoch": 0.6293616337031677, "fp16_scale": 1.0, "global_step": 1957, "grad_norm": 1.8549947953647987, "learning_rate": 6.379868733986088e-07, "loss": 0.5051, "step": 1957 }, { "ETA": 2.2, "epoch": 0.629683228814922, "fp16_scale": 1.0, "global_step": 1958, "grad_norm": 1.7524149471514499, "learning_rate": 6.370157573840187e-07, "loss": 0.4633, "step": 1958 }, { "ETA": 2.19, "epoch": 0.6300048239266763, "fp16_scale": 1.0, "global_step": 1959, "grad_norm": 1.9221223955013331, "learning_rate": 6.360450354752458e-07, "loss": 0.4509, "step": 1959 }, { "ETA": 2.19, "epoch": 0.6303264190384306, "fp16_scale": 1.0, "global_step": 1960, "grad_norm": 2.194917362984832, "learning_rate": 6.3507470872624e-07, "loss": 0.4769, "step": 1960 }, { "ETA": 2.19, "epoch": 0.6306480141501849, "fp16_scale": 1.0, "global_step": 1961, "grad_norm": 1.9024229881646617, "learning_rate": 6.341047781905222e-07, "loss": 0.4523, "step": 1961 }, { "ETA": 2.19, "epoch": 0.6309696092619392, "fp16_scale": 1.0, "global_step": 1962, "grad_norm": 1.8615673024806854, "learning_rate": 6.331352449211826e-07, "loss": 0.4447, "step": 1962 }, { "ETA": 2.19, "epoch": 0.6312912043736936, "fp16_scale": 1.0, "global_step": 1963, "grad_norm": 1.793802394033057, "learning_rate": 6.321661099708811e-07, "loss": 0.4711, "step": 1963 }, { "ETA": 2.18, "epoch": 0.6316127994854478, "fp16_scale": 1.0, "global_step": 1964, "grad_norm": 2.1104973254266373, "learning_rate": 6.311973743918437e-07, "loss": 0.3755, "step": 1964 }, { "ETA": 2.18, "epoch": 0.6319343945972021, "fp16_scale": 1.0, "global_step": 1965, "grad_norm": 2.1964254451114145, "learning_rate": 6.302290392358635e-07, "loss": 0.5188, "step": 1965 }, { "ETA": 2.18, "epoch": 0.6322559897089565, "fp16_scale": 1.0, "global_step": 1966, "grad_norm": 1.751616382161262, "learning_rate": 6.292611055542996e-07, "loss": 0.3545, "step": 1966 }, { "ETA": 2.18, "epoch": 0.6325775848207107, "fp16_scale": 1.0, "global_step": 1967, "grad_norm": 2.1647898881601546, "learning_rate": 6.282935743980735e-07, "loss": 0.4911, "step": 1967 }, { "ETA": 2.18, "epoch": 0.632899179932465, "fp16_scale": 1.0, "global_step": 1968, "grad_norm": 1.949564451269776, "learning_rate": 6.273264468176715e-07, "loss": 0.3563, "step": 1968 }, { "ETA": 2.17, "epoch": 0.6332207750442194, "fp16_scale": 1.0, "global_step": 1969, "grad_norm": 2.1152615023556467, "learning_rate": 6.263597238631404e-07, "loss": 0.4673, "step": 1969 }, { "ETA": 2.17, "epoch": 0.6335423701559736, "fp16_scale": 1.0, "global_step": 1970, "grad_norm": 2.0050156477818835, "learning_rate": 6.253934065840879e-07, "loss": 0.4085, "step": 1970 }, { "ETA": 2.17, "epoch": 0.6338639652677279, "fp16_scale": 1.0, "global_step": 1971, "grad_norm": 2.095570426093439, "learning_rate": 6.244274960296823e-07, "loss": 0.4026, "step": 1971 }, { "ETA": 2.17, "epoch": 0.6341855603794823, "fp16_scale": 1.0, "global_step": 1972, "grad_norm": 2.1479747684036172, "learning_rate": 6.234619932486485e-07, "loss": 0.4654, "step": 1972 }, { "ETA": 2.17, "epoch": 0.6345071554912365, "fp16_scale": 1.0, "global_step": 1973, "grad_norm": 1.8034781534833133, "learning_rate": 6.224968992892701e-07, "loss": 0.4604, "step": 1973 }, { "ETA": 2.16, "epoch": 0.6348287506029908, "fp16_scale": 1.0, "global_step": 1974, "grad_norm": 1.811041843738487, "learning_rate": 6.215322151993863e-07, "loss": 0.4874, "step": 1974 }, { "ETA": 2.16, "epoch": 0.6351503457147452, "fp16_scale": 1.0, "global_step": 1975, "grad_norm": 1.9841854199069409, "learning_rate": 6.205679420263916e-07, "loss": 0.4831, "step": 1975 }, { "ETA": 2.16, "epoch": 0.6354719408264994, "fp16_scale": 1.0, "global_step": 1976, "grad_norm": 1.9980204524170417, "learning_rate": 6.196040808172343e-07, "loss": 0.4439, "step": 1976 }, { "ETA": 2.16, "epoch": 0.6357935359382537, "fp16_scale": 1.0, "global_step": 1977, "grad_norm": 1.9608922773188318, "learning_rate": 6.186406326184143e-07, "loss": 0.4321, "step": 1977 }, { "ETA": 2.16, "epoch": 0.636115131050008, "fp16_scale": 1.0, "global_step": 1978, "grad_norm": 2.2580222602067272, "learning_rate": 6.176775984759847e-07, "loss": 0.4243, "step": 1978 }, { "ETA": 2.16, "epoch": 0.6364367261617624, "fp16_scale": 1.0, "global_step": 1979, "grad_norm": 2.1119575206185974, "learning_rate": 6.167149794355481e-07, "loss": 0.4395, "step": 1979 }, { "ETA": 2.15, "epoch": 0.6367583212735166, "fp16_scale": 1.0, "global_step": 1980, "grad_norm": 1.811932127171653, "learning_rate": 6.157527765422573e-07, "loss": 0.3301, "step": 1980 }, { "ETA": 2.15, "epoch": 0.637079916385271, "fp16_scale": 1.0, "global_step": 1981, "grad_norm": 1.9755538028177826, "learning_rate": 6.147909908408115e-07, "loss": 0.5009, "step": 1981 }, { "ETA": 2.15, "epoch": 0.6374015114970253, "fp16_scale": 1.0, "global_step": 1982, "grad_norm": 1.757935829368295, "learning_rate": 6.138296233754587e-07, "loss": 0.4297, "step": 1982 }, { "ETA": 2.15, "epoch": 0.6377231066087795, "fp16_scale": 1.0, "global_step": 1983, "grad_norm": 2.1585630597789818, "learning_rate": 6.128686751899924e-07, "loss": 0.4291, "step": 1983 }, { "ETA": 2.15, "epoch": 0.6380447017205338, "fp16_scale": 1.0, "global_step": 1984, "grad_norm": 1.9614757976606525, "learning_rate": 6.119081473277501e-07, "loss": 0.4499, "step": 1984 }, { "ETA": 2.14, "epoch": 0.6383662968322882, "fp16_scale": 1.0, "global_step": 1985, "grad_norm": 1.8461831674771794, "learning_rate": 6.109480408316143e-07, "loss": 0.361, "step": 1985 }, { "ETA": 2.14, "epoch": 0.6386878919440424, "fp16_scale": 1.0, "global_step": 1986, "grad_norm": 2.5933550939749392, "learning_rate": 6.099883567440081e-07, "loss": 0.486, "step": 1986 }, { "ETA": 2.14, "epoch": 0.6390094870557967, "fp16_scale": 1.0, "global_step": 1987, "grad_norm": 1.7600846185114918, "learning_rate": 6.090290961068978e-07, "loss": 0.4141, "step": 1987 }, { "ETA": 2.14, "epoch": 0.6393310821675511, "fp16_scale": 1.0, "global_step": 1988, "grad_norm": 2.1253603328467463, "learning_rate": 6.080702599617892e-07, "loss": 0.3644, "step": 1988 }, { "ETA": 2.14, "epoch": 0.6396526772793053, "fp16_scale": 1.0, "global_step": 1989, "grad_norm": 2.197696654861091, "learning_rate": 6.07111849349727e-07, "loss": 0.3995, "step": 1989 }, { "ETA": 2.13, "epoch": 0.6399742723910596, "fp16_scale": 1.0, "global_step": 1990, "grad_norm": 1.9931956613987192, "learning_rate": 6.061538653112941e-07, "loss": 0.4471, "step": 1990 }, { "ETA": 2.13, "epoch": 0.640295867502814, "fp16_scale": 1.0, "global_step": 1991, "grad_norm": 1.8791204448826448, "learning_rate": 6.051963088866101e-07, "loss": 0.3875, "step": 1991 }, { "ETA": 2.13, "epoch": 0.6406174626145683, "fp16_scale": 1.0, "global_step": 1992, "grad_norm": 1.865998253665651, "learning_rate": 6.042391811153309e-07, "loss": 0.4142, "step": 1992 }, { "ETA": 2.13, "epoch": 0.6409390577263225, "fp16_scale": 1.0, "global_step": 1993, "grad_norm": 2.0521484255436757, "learning_rate": 6.032824830366466e-07, "loss": 0.4343, "step": 1993 }, { "ETA": 2.13, "epoch": 0.6412606528380769, "fp16_scale": 1.0, "global_step": 1994, "grad_norm": 1.9804359524170014, "learning_rate": 6.023262156892801e-07, "loss": 0.3973, "step": 1994 }, { "ETA": 2.12, "epoch": 0.6415822479498312, "fp16_scale": 1.0, "global_step": 1995, "grad_norm": 2.1542457139304325, "learning_rate": 6.01370380111488e-07, "loss": 0.3889, "step": 1995 }, { "ETA": 2.12, "epoch": 0.6419038430615854, "fp16_scale": 1.0, "global_step": 1996, "grad_norm": 2.0882781621428017, "learning_rate": 6.004149773410568e-07, "loss": 0.4484, "step": 1996 }, { "ETA": 2.12, "epoch": 0.6422254381733398, "fp16_scale": 1.0, "global_step": 1997, "grad_norm": 2.234274236906816, "learning_rate": 5.994600084153043e-07, "loss": 0.4473, "step": 1997 }, { "ETA": 2.12, "epoch": 0.6425470332850941, "fp16_scale": 1.0, "global_step": 1998, "grad_norm": 2.0635888020273354, "learning_rate": 5.985054743710763e-07, "loss": 0.425, "step": 1998 }, { "ETA": 2.12, "epoch": 0.6428686283968483, "fp16_scale": 1.0, "global_step": 1999, "grad_norm": 1.8860137964659152, "learning_rate": 5.975513762447464e-07, "loss": 0.4515, "step": 1999 }, { "ETA": 2.11, "epoch": 0.6431902235086027, "fp16_scale": 1.0, "global_step": 2000, "grad_norm": 2.2500901908562296, "learning_rate": 5.965977150722159e-07, "loss": 0.3894, "step": 2000 }, { "ETA": 2.12, "epoch": 0.643511818620357, "fp16_scale": 1.0, "global_step": 2001, "grad_norm": 1.8436424869277004, "learning_rate": 5.956444918889107e-07, "loss": 0.3802, "step": 2001 }, { "ETA": 2.12, "epoch": 0.6438334137321112, "fp16_scale": 1.0, "global_step": 2002, "grad_norm": 2.054805311035249, "learning_rate": 5.946917077297819e-07, "loss": 0.4297, "step": 2002 }, { "ETA": 2.11, "epoch": 0.6441550088438656, "fp16_scale": 1.0, "global_step": 2003, "grad_norm": 1.7534913161624872, "learning_rate": 5.937393636293029e-07, "loss": 0.3728, "step": 2003 }, { "ETA": 2.11, "epoch": 0.6444766039556199, "fp16_scale": 1.0, "global_step": 2004, "grad_norm": 2.0300920903469537, "learning_rate": 5.927874606214704e-07, "loss": 0.4116, "step": 2004 }, { "ETA": 2.11, "epoch": 0.6447981990673741, "fp16_scale": 1.0, "global_step": 2005, "grad_norm": 2.2765492258820026, "learning_rate": 5.918359997398019e-07, "loss": 0.4623, "step": 2005 }, { "ETA": 2.11, "epoch": 0.6451197941791285, "fp16_scale": 1.0, "global_step": 2006, "grad_norm": 1.8058215948622816, "learning_rate": 5.908849820173343e-07, "loss": 0.3361, "step": 2006 }, { "ETA": 2.11, "epoch": 0.6454413892908828, "fp16_scale": 1.0, "global_step": 2007, "grad_norm": 1.9983789993118626, "learning_rate": 5.899344084866243e-07, "loss": 0.4584, "step": 2007 }, { "ETA": 2.1, "epoch": 0.6457629844026371, "fp16_scale": 1.0, "global_step": 2008, "grad_norm": 1.9215556764900221, "learning_rate": 5.88984280179745e-07, "loss": 0.4054, "step": 2008 }, { "ETA": 2.1, "epoch": 0.6460845795143914, "fp16_scale": 1.0, "global_step": 2009, "grad_norm": 2.00279358717218, "learning_rate": 5.880345981282876e-07, "loss": 0.4736, "step": 2009 }, { "ETA": 2.1, "epoch": 0.6464061746261457, "fp16_scale": 1.0, "global_step": 2010, "grad_norm": 2.0955002318335554, "learning_rate": 5.87085363363358e-07, "loss": 0.4548, "step": 2010 }, { "ETA": 2.1, "epoch": 0.6467277697379, "fp16_scale": 1.0, "global_step": 2011, "grad_norm": 2.073198212831422, "learning_rate": 5.861365769155759e-07, "loss": 0.3939, "step": 2011 }, { "ETA": 2.1, "epoch": 0.6470493648496543, "fp16_scale": 1.0, "global_step": 2012, "grad_norm": 2.135562977334264, "learning_rate": 5.851882398150756e-07, "loss": 0.3873, "step": 2012 }, { "ETA": 2.09, "epoch": 0.6473709599614086, "fp16_scale": 1.0, "global_step": 2013, "grad_norm": 1.6888098628966262, "learning_rate": 5.842403530915024e-07, "loss": 0.3549, "step": 2013 }, { "ETA": 2.09, "epoch": 0.6476925550731629, "fp16_scale": 1.0, "global_step": 2014, "grad_norm": 1.9980347065391566, "learning_rate": 5.832929177740133e-07, "loss": 0.4361, "step": 2014 }, { "ETA": 2.09, "epoch": 0.6480141501849171, "fp16_scale": 1.0, "global_step": 2015, "grad_norm": 1.8838140797820973, "learning_rate": 5.823459348912747e-07, "loss": 0.4179, "step": 2015 }, { "ETA": 2.09, "epoch": 0.6483357452966715, "fp16_scale": 1.0, "global_step": 2016, "grad_norm": 1.9322911975799548, "learning_rate": 5.813994054714624e-07, "loss": 0.3777, "step": 2016 }, { "ETA": 2.09, "epoch": 0.6486573404084258, "fp16_scale": 1.0, "global_step": 2017, "grad_norm": 1.9250320413518434, "learning_rate": 5.80453330542259e-07, "loss": 0.427, "step": 2017 }, { "ETA": 2.09, "epoch": 0.64897893552018, "fp16_scale": 1.0, "global_step": 2018, "grad_norm": 2.38827320000801, "learning_rate": 5.795077111308539e-07, "loss": 0.4415, "step": 2018 }, { "ETA": 2.08, "epoch": 0.6493005306319344, "fp16_scale": 1.0, "global_step": 2019, "grad_norm": 2.0461024210639307, "learning_rate": 5.785625482639425e-07, "loss": 0.4369, "step": 2019 }, { "ETA": 2.08, "epoch": 0.6496221257436887, "fp16_scale": 1.0, "global_step": 2020, "grad_norm": 2.156475214040958, "learning_rate": 5.776178429677238e-07, "loss": 0.4359, "step": 2020 }, { "ETA": 2.08, "epoch": 0.649943720855443, "fp16_scale": 1.0, "global_step": 2021, "grad_norm": 2.1010704366490227, "learning_rate": 5.76673596267901e-07, "loss": 0.4928, "step": 2021 }, { "ETA": 2.08, "epoch": 0.6502653159671973, "fp16_scale": 1.0, "global_step": 2022, "grad_norm": 1.8865559086622918, "learning_rate": 5.757298091896783e-07, "loss": 0.444, "step": 2022 }, { "ETA": 2.08, "epoch": 0.6505869110789516, "fp16_scale": 1.0, "global_step": 2023, "grad_norm": 2.018950994626792, "learning_rate": 5.747864827577608e-07, "loss": 0.3737, "step": 2023 }, { "ETA": 2.07, "epoch": 0.650908506190706, "fp16_scale": 1.0, "global_step": 2024, "grad_norm": 1.9639450586195586, "learning_rate": 5.738436179963544e-07, "loss": 0.3379, "step": 2024 }, { "ETA": 2.07, "epoch": 0.6512301013024602, "fp16_scale": 1.0, "global_step": 2025, "grad_norm": 2.0222112000629164, "learning_rate": 5.729012159291633e-07, "loss": 0.4636, "step": 2025 }, { "ETA": 2.07, "epoch": 0.6515516964142145, "fp16_scale": 1.0, "global_step": 2026, "grad_norm": 1.9355969546007485, "learning_rate": 5.719592775793897e-07, "loss": 0.463, "step": 2026 }, { "ETA": 2.07, "epoch": 0.6518732915259688, "fp16_scale": 1.0, "global_step": 2027, "grad_norm": 2.111716710658819, "learning_rate": 5.710178039697313e-07, "loss": 0.3557, "step": 2027 }, { "ETA": 2.07, "epoch": 0.6521948866377231, "fp16_scale": 1.0, "global_step": 2028, "grad_norm": 2.202734879548525, "learning_rate": 5.700767961223818e-07, "loss": 0.4938, "step": 2028 }, { "ETA": 2.07, "epoch": 0.6525164817494774, "fp16_scale": 1.0, "global_step": 2029, "grad_norm": 1.8289846529993485, "learning_rate": 5.691362550590296e-07, "loss": 0.415, "step": 2029 }, { "ETA": 2.06, "epoch": 0.6528380768612317, "fp16_scale": 1.0, "global_step": 2030, "grad_norm": 1.8551396366004673, "learning_rate": 5.681961818008558e-07, "loss": 0.4116, "step": 2030 }, { "ETA": 2.06, "epoch": 0.653159671972986, "fp16_scale": 1.0, "global_step": 2031, "grad_norm": 1.9685686893056107, "learning_rate": 5.672565773685343e-07, "loss": 0.4479, "step": 2031 }, { "ETA": 2.06, "epoch": 0.6534812670847403, "fp16_scale": 1.0, "global_step": 2032, "grad_norm": 1.8184331124615274, "learning_rate": 5.663174427822284e-07, "loss": 0.3493, "step": 2032 }, { "ETA": 2.06, "epoch": 0.6538028621964946, "fp16_scale": 1.0, "global_step": 2033, "grad_norm": 1.92902744488878, "learning_rate": 5.653787790615934e-07, "loss": 0.4476, "step": 2033 }, { "ETA": 2.06, "epoch": 0.6541244573082489, "fp16_scale": 1.0, "global_step": 2034, "grad_norm": 1.7453219695542925, "learning_rate": 5.644405872257716e-07, "loss": 0.4327, "step": 2034 }, { "ETA": 2.05, "epoch": 0.6544460524200032, "fp16_scale": 1.0, "global_step": 2035, "grad_norm": 2.0565556645252623, "learning_rate": 5.635028682933928e-07, "loss": 0.4334, "step": 2035 }, { "ETA": 2.05, "epoch": 0.6547676475317575, "fp16_scale": 1.0, "global_step": 2036, "grad_norm": 1.8334976412186053, "learning_rate": 5.62565623282576e-07, "loss": 0.3892, "step": 2036 }, { "ETA": 2.05, "epoch": 0.6550892426435119, "fp16_scale": 1.0, "global_step": 2037, "grad_norm": 2.061393856117499, "learning_rate": 5.616288532109224e-07, "loss": 0.4146, "step": 2037 }, { "ETA": 2.05, "epoch": 0.6554108377552661, "fp16_scale": 1.0, "global_step": 2038, "grad_norm": 1.9852001975556002, "learning_rate": 5.606925590955198e-07, "loss": 0.4019, "step": 2038 }, { "ETA": 2.05, "epoch": 0.6557324328670204, "fp16_scale": 1.0, "global_step": 2039, "grad_norm": 2.176443428455659, "learning_rate": 5.597567419529381e-07, "loss": 0.4754, "step": 2039 }, { "ETA": 2.05, "epoch": 0.6560540279787748, "fp16_scale": 1.0, "global_step": 2040, "grad_norm": 1.855250357011581, "learning_rate": 5.58821402799229e-07, "loss": 0.4311, "step": 2040 }, { "ETA": 2.04, "epoch": 0.656375623090529, "fp16_scale": 1.0, "global_step": 2041, "grad_norm": 2.314909571286062, "learning_rate": 5.578865426499265e-07, "loss": 0.4096, "step": 2041 }, { "ETA": 2.04, "epoch": 0.6566972182022833, "fp16_scale": 1.0, "global_step": 2042, "grad_norm": 2.1433437129678943, "learning_rate": 5.569521625200435e-07, "loss": 0.4667, "step": 2042 }, { "ETA": 2.04, "epoch": 0.6570188133140377, "fp16_scale": 1.0, "global_step": 2043, "grad_norm": 1.5629412526012503, "learning_rate": 5.560182634240729e-07, "loss": 0.4074, "step": 2043 }, { "ETA": 2.04, "epoch": 0.6573404084257919, "fp16_scale": 1.0, "global_step": 2044, "grad_norm": 2.1051220122344114, "learning_rate": 5.550848463759834e-07, "loss": 0.4296, "step": 2044 }, { "ETA": 2.04, "epoch": 0.6576620035375462, "fp16_scale": 1.0, "global_step": 2045, "grad_norm": 2.0755242456635004, "learning_rate": 5.541519123892224e-07, "loss": 0.4052, "step": 2045 }, { "ETA": 2.03, "epoch": 0.6579835986493006, "fp16_scale": 1.0, "global_step": 2046, "grad_norm": 2.0063920244386697, "learning_rate": 5.532194624767111e-07, "loss": 0.4453, "step": 2046 }, { "ETA": 2.03, "epoch": 0.6583051937610548, "fp16_scale": 1.0, "global_step": 2047, "grad_norm": 1.9310314752786921, "learning_rate": 5.522874976508463e-07, "loss": 0.422, "step": 2047 }, { "ETA": 2.03, "epoch": 0.6586267888728091, "fp16_scale": 1.0, "global_step": 2048, "grad_norm": 1.8147845689337565, "learning_rate": 5.513560189234978e-07, "loss": 0.5086, "step": 2048 }, { "ETA": 2.03, "epoch": 0.6589483839845635, "fp16_scale": 1.0, "global_step": 2049, "grad_norm": 1.9326449795983494, "learning_rate": 5.504250273060072e-07, "loss": 0.3729, "step": 2049 }, { "ETA": 2.03, "epoch": 0.6592699790963177, "fp16_scale": 1.0, "global_step": 2050, "grad_norm": 1.839187483126056, "learning_rate": 5.49494523809188e-07, "loss": 0.4142, "step": 2050 }, { "ETA": 2.02, "epoch": 0.659591574208072, "fp16_scale": 1.0, "global_step": 2051, "grad_norm": 2.0458852111091383, "learning_rate": 5.485645094433227e-07, "loss": 0.4033, "step": 2051 }, { "ETA": 2.02, "epoch": 0.6599131693198264, "fp16_scale": 1.0, "global_step": 2052, "grad_norm": 1.982259199461479, "learning_rate": 5.476349852181634e-07, "loss": 0.4611, "step": 2052 }, { "ETA": 2.02, "epoch": 0.6602347644315807, "fp16_scale": 1.0, "global_step": 2053, "grad_norm": 1.9886670061567648, "learning_rate": 5.467059521429309e-07, "loss": 0.425, "step": 2053 }, { "ETA": 2.02, "epoch": 0.6605563595433349, "fp16_scale": 1.0, "global_step": 2054, "grad_norm": 2.0980190758463624, "learning_rate": 5.457774112263105e-07, "loss": 0.4097, "step": 2054 }, { "ETA": 2.02, "epoch": 0.6608779546550893, "fp16_scale": 1.0, "global_step": 2055, "grad_norm": 1.7706319961454073, "learning_rate": 5.448493634764554e-07, "loss": 0.3716, "step": 2055 }, { "ETA": 2.01, "epoch": 0.6611995497668436, "fp16_scale": 1.0, "global_step": 2056, "grad_norm": 1.970227029214789, "learning_rate": 5.439218099009822e-07, "loss": 0.3874, "step": 2056 }, { "ETA": 2.01, "epoch": 0.6615211448785978, "fp16_scale": 1.0, "global_step": 2057, "grad_norm": 2.433884192071341, "learning_rate": 5.429947515069699e-07, "loss": 0.411, "step": 2057 }, { "ETA": 2.01, "epoch": 0.6618427399903521, "fp16_scale": 1.0, "global_step": 2058, "grad_norm": 2.353209000193179, "learning_rate": 5.42068189300963e-07, "loss": 0.462, "step": 2058 }, { "ETA": 2.01, "epoch": 0.6621643351021065, "fp16_scale": 1.0, "global_step": 2059, "grad_norm": 2.0345860122223227, "learning_rate": 5.411421242889642e-07, "loss": 0.4137, "step": 2059 }, { "ETA": 2.01, "epoch": 0.6624859302138607, "fp16_scale": 1.0, "global_step": 2060, "grad_norm": 1.957565225211963, "learning_rate": 5.402165574764383e-07, "loss": 0.4172, "step": 2060 }, { "ETA": 2.0, "epoch": 0.662807525325615, "fp16_scale": 1.0, "global_step": 2061, "grad_norm": 2.180623010242645, "learning_rate": 5.392914898683077e-07, "loss": 0.4962, "step": 2061 }, { "ETA": 2.0, "epoch": 0.6631291204373694, "fp16_scale": 1.0, "global_step": 2062, "grad_norm": 1.8248603953685665, "learning_rate": 5.38366922468954e-07, "loss": 0.4274, "step": 2062 }, { "ETA": 2.0, "epoch": 0.6634507155491236, "fp16_scale": 1.0, "global_step": 2063, "grad_norm": 1.8267433758122193, "learning_rate": 5.374428562822151e-07, "loss": 0.4159, "step": 2063 }, { "ETA": 2.0, "epoch": 0.6637723106608779, "fp16_scale": 1.0, "global_step": 2064, "grad_norm": 1.923510432210067, "learning_rate": 5.365192923113846e-07, "loss": 0.4498, "step": 2064 }, { "ETA": 2.0, "epoch": 0.6640939057726323, "fp16_scale": 1.0, "global_step": 2065, "grad_norm": 1.9820096390596678, "learning_rate": 5.355962315592118e-07, "loss": 0.4484, "step": 2065 }, { "ETA": 1.99, "epoch": 0.6644155008843866, "fp16_scale": 1.0, "global_step": 2066, "grad_norm": 1.954182301373188, "learning_rate": 5.34673675027898e-07, "loss": 0.4506, "step": 2066 }, { "ETA": 1.99, "epoch": 0.6647370959961408, "fp16_scale": 1.0, "global_step": 2067, "grad_norm": 2.10881723708717, "learning_rate": 5.337516237190989e-07, "loss": 0.3476, "step": 2067 }, { "ETA": 1.99, "epoch": 0.6650586911078952, "fp16_scale": 1.0, "global_step": 2068, "grad_norm": 2.0858184276372653, "learning_rate": 5.328300786339199e-07, "loss": 0.4174, "step": 2068 }, { "ETA": 1.99, "epoch": 0.6653802862196495, "fp16_scale": 1.0, "global_step": 2069, "grad_norm": 1.725948847457649, "learning_rate": 5.319090407729179e-07, "loss": 0.419, "step": 2069 }, { "ETA": 1.99, "epoch": 0.6657018813314037, "fp16_scale": 1.0, "global_step": 2070, "grad_norm": 1.9176502634160255, "learning_rate": 5.309885111360993e-07, "loss": 0.4788, "step": 2070 }, { "ETA": 1.98, "epoch": 0.6660234764431581, "fp16_scale": 1.0, "global_step": 2071, "grad_norm": 1.8852044620717578, "learning_rate": 5.300684907229172e-07, "loss": 0.3537, "step": 2071 }, { "ETA": 1.98, "epoch": 0.6663450715549124, "fp16_scale": 1.0, "global_step": 2072, "grad_norm": 1.9543436571900248, "learning_rate": 5.291489805322738e-07, "loss": 0.3813, "step": 2072 }, { "ETA": 1.98, "epoch": 0.6666666666666666, "fp16_scale": 1.0, "global_step": 2073, "grad_norm": 2.141372059399777, "learning_rate": 5.282299815625153e-07, "loss": 0.4728, "step": 2073 }, { "ETA": 1.98, "epoch": 0.666988261778421, "fp16_scale": 1.0, "global_step": 2074, "grad_norm": 2.0654724895865955, "learning_rate": 5.273114948114346e-07, "loss": 0.4454, "step": 2074 }, { "ETA": 1.98, "epoch": 0.6673098568901753, "fp16_scale": 1.0, "global_step": 2075, "grad_norm": 1.9880234369103011, "learning_rate": 5.26393521276268e-07, "loss": 0.4283, "step": 2075 }, { "ETA": 1.98, "epoch": 0.6676314520019295, "fp16_scale": 1.0, "global_step": 2076, "grad_norm": 1.9457472789077968, "learning_rate": 5.254760619536935e-07, "loss": 0.4031, "step": 2076 }, { "ETA": 1.97, "epoch": 0.6679530471136839, "fp16_scale": 1.0, "global_step": 2077, "grad_norm": 1.9498255842743843, "learning_rate": 5.245591178398323e-07, "loss": 0.4649, "step": 2077 }, { "ETA": 1.97, "epoch": 0.6682746422254382, "fp16_scale": 1.0, "global_step": 2078, "grad_norm": 1.9434489908906305, "learning_rate": 5.23642689930245e-07, "loss": 0.3411, "step": 2078 }, { "ETA": 1.97, "epoch": 0.6685962373371924, "fp16_scale": 1.0, "global_step": 2079, "grad_norm": 2.051200817295661, "learning_rate": 5.227267792199332e-07, "loss": 0.3771, "step": 2079 }, { "ETA": 1.97, "epoch": 0.6689178324489468, "fp16_scale": 1.0, "global_step": 2080, "grad_norm": 1.8532587279366703, "learning_rate": 5.218113867033349e-07, "loss": 0.3315, "step": 2080 }, { "ETA": 1.96, "epoch": 0.6692394275607011, "fp16_scale": 1.0, "global_step": 2081, "grad_norm": 2.0683333349623556, "learning_rate": 5.208965133743271e-07, "loss": 0.3896, "step": 2081 }, { "ETA": 1.96, "epoch": 0.6695610226724554, "fp16_scale": 1.0, "global_step": 2082, "grad_norm": 1.9150254189977027, "learning_rate": 5.199821602262231e-07, "loss": 0.491, "step": 2082 }, { "ETA": 1.96, "epoch": 0.6698826177842097, "fp16_scale": 1.0, "global_step": 2083, "grad_norm": 1.8251708279077623, "learning_rate": 5.190683282517701e-07, "loss": 0.369, "step": 2083 }, { "ETA": 1.96, "epoch": 0.670204212895964, "fp16_scale": 1.0, "global_step": 2084, "grad_norm": 2.280440590990741, "learning_rate": 5.18155018443151e-07, "loss": 0.3774, "step": 2084 }, { "ETA": 1.96, "epoch": 0.6705258080077183, "fp16_scale": 1.0, "global_step": 2085, "grad_norm": 2.0733753399267445, "learning_rate": 5.172422317919804e-07, "loss": 0.4403, "step": 2085 }, { "ETA": 1.95, "epoch": 0.6708474031194726, "fp16_scale": 1.0, "global_step": 2086, "grad_norm": 2.302744266658567, "learning_rate": 5.163299692893059e-07, "loss": 0.4789, "step": 2086 }, { "ETA": 1.95, "epoch": 0.6711689982312269, "fp16_scale": 1.0, "global_step": 2087, "grad_norm": 2.106795394185773, "learning_rate": 5.15418231925606e-07, "loss": 0.4034, "step": 2087 }, { "ETA": 1.95, "epoch": 0.6714905933429812, "fp16_scale": 1.0, "global_step": 2088, "grad_norm": 2.233870228931919, "learning_rate": 5.14507020690788e-07, "loss": 0.4579, "step": 2088 }, { "ETA": 1.95, "epoch": 0.6718121884547354, "fp16_scale": 1.0, "global_step": 2089, "grad_norm": 2.0075117201584884, "learning_rate": 5.135963365741891e-07, "loss": 0.4422, "step": 2089 }, { "ETA": 1.95, "epoch": 0.6721337835664898, "fp16_scale": 1.0, "global_step": 2090, "grad_norm": 2.077780167824397, "learning_rate": 5.126861805645734e-07, "loss": 0.4361, "step": 2090 }, { "ETA": 1.95, "epoch": 0.6724553786782441, "fp16_scale": 1.0, "global_step": 2091, "grad_norm": 1.6934528056624099, "learning_rate": 5.11776553650132e-07, "loss": 0.3995, "step": 2091 }, { "ETA": 1.94, "epoch": 0.6727769737899983, "fp16_scale": 1.0, "global_step": 2092, "grad_norm": 2.0314987863788754, "learning_rate": 5.108674568184821e-07, "loss": 0.4113, "step": 2092 }, { "ETA": 1.94, "epoch": 0.6730985689017527, "fp16_scale": 1.0, "global_step": 2093, "grad_norm": 1.9359311018879675, "learning_rate": 5.099588910566637e-07, "loss": 0.4002, "step": 2093 }, { "ETA": 1.94, "epoch": 0.673420164013507, "fp16_scale": 1.0, "global_step": 2094, "grad_norm": 1.8677083740014844, "learning_rate": 5.090508573511423e-07, "loss": 0.4368, "step": 2094 }, { "ETA": 1.94, "epoch": 0.6737417591252612, "fp16_scale": 1.0, "global_step": 2095, "grad_norm": 1.9545734304206386, "learning_rate": 5.081433566878038e-07, "loss": 0.4803, "step": 2095 }, { "ETA": 1.94, "epoch": 0.6740633542370156, "fp16_scale": 1.0, "global_step": 2096, "grad_norm": 2.0827276601919973, "learning_rate": 5.072363900519566e-07, "loss": 0.4697, "step": 2096 }, { "ETA": 1.93, "epoch": 0.6743849493487699, "fp16_scale": 1.0, "global_step": 2097, "grad_norm": 1.8459348603587467, "learning_rate": 5.063299584283294e-07, "loss": 0.4974, "step": 2097 }, { "ETA": 1.93, "epoch": 0.6747065444605242, "fp16_scale": 1.0, "global_step": 2098, "grad_norm": 1.9178692432374298, "learning_rate": 5.054240628010686e-07, "loss": 0.4044, "step": 2098 }, { "ETA": 1.93, "epoch": 0.6750281395722785, "fp16_scale": 1.0, "global_step": 2099, "grad_norm": 1.9316722920720346, "learning_rate": 5.045187041537404e-07, "loss": 0.4077, "step": 2099 }, { "ETA": 1.93, "epoch": 0.6753497346840328, "fp16_scale": 1.0, "global_step": 2100, "grad_norm": 2.189453020861492, "learning_rate": 5.036138834693267e-07, "loss": 0.3788, "step": 2100 }, { "ETA": 1.93, "epoch": 0.6756713297957871, "fp16_scale": 1.0, "global_step": 2101, "grad_norm": 1.8196764666790626, "learning_rate": 5.02709601730226e-07, "loss": 0.4728, "step": 2101 }, { "ETA": 1.92, "epoch": 0.6759929249075414, "fp16_scale": 1.0, "global_step": 2102, "grad_norm": 1.9021321042207269, "learning_rate": 5.018058599182507e-07, "loss": 0.4484, "step": 2102 }, { "ETA": 1.92, "epoch": 0.6763145200192957, "fp16_scale": 1.0, "global_step": 2103, "grad_norm": 1.9953437493352695, "learning_rate": 5.009026590146293e-07, "loss": 0.3999, "step": 2103 }, { "ETA": 1.92, "epoch": 0.67663611513105, "fp16_scale": 1.0, "global_step": 2104, "grad_norm": 2.096146733100405, "learning_rate": 5.000000000000002e-07, "loss": 0.4082, "step": 2104 }, { "ETA": 1.92, "epoch": 0.6769577102428043, "fp16_scale": 1.0, "global_step": 2105, "grad_norm": 2.115570096537946, "learning_rate": 4.990978838544147e-07, "loss": 0.3856, "step": 2105 }, { "ETA": 1.92, "epoch": 0.6772793053545586, "fp16_scale": 1.0, "global_step": 2106, "grad_norm": 1.8089565442410478, "learning_rate": 4.981963115573352e-07, "loss": 0.4085, "step": 2106 }, { "ETA": 1.92, "epoch": 0.6776009004663129, "fp16_scale": 1.0, "global_step": 2107, "grad_norm": 1.98835696095386, "learning_rate": 4.972952840876325e-07, "loss": 0.4526, "step": 2107 }, { "ETA": 1.91, "epoch": 0.6779224955780672, "fp16_scale": 1.0, "global_step": 2108, "grad_norm": 1.7542113635635128, "learning_rate": 4.963948024235866e-07, "loss": 0.441, "step": 2108 }, { "ETA": 1.91, "epoch": 0.6782440906898215, "fp16_scale": 1.0, "global_step": 2109, "grad_norm": 2.114280110588985, "learning_rate": 4.954948675428853e-07, "loss": 0.4141, "step": 2109 }, { "ETA": 1.91, "epoch": 0.6785656858015758, "fp16_scale": 1.0, "global_step": 2110, "grad_norm": 2.1118367919477357, "learning_rate": 4.945954804226214e-07, "loss": 0.4093, "step": 2110 }, { "ETA": 1.91, "epoch": 0.6788872809133302, "fp16_scale": 1.0, "global_step": 2111, "grad_norm": 1.9635385154854137, "learning_rate": 4.936966420392944e-07, "loss": 0.4003, "step": 2111 }, { "ETA": 1.91, "epoch": 0.6792088760250844, "fp16_scale": 1.0, "global_step": 2112, "grad_norm": 1.977750108398266, "learning_rate": 4.927983533688067e-07, "loss": 0.4322, "step": 2112 }, { "ETA": 1.9, "epoch": 0.6795304711368387, "fp16_scale": 1.0, "global_step": 2113, "grad_norm": 1.8893135953840186, "learning_rate": 4.919006153864648e-07, "loss": 0.4925, "step": 2113 }, { "ETA": 1.9, "epoch": 0.6798520662485931, "fp16_scale": 1.0, "global_step": 2114, "grad_norm": 2.14033345862397, "learning_rate": 4.910034290669776e-07, "loss": 0.3787, "step": 2114 }, { "ETA": 1.9, "epoch": 0.6801736613603473, "fp16_scale": 1.0, "global_step": 2115, "grad_norm": 1.7925381188694647, "learning_rate": 4.901067953844537e-07, "loss": 0.4401, "step": 2115 }, { "ETA": 1.9, "epoch": 0.6804952564721016, "fp16_scale": 1.0, "global_step": 2116, "grad_norm": 2.0063586648085887, "learning_rate": 4.892107153124029e-07, "loss": 0.453, "step": 2116 }, { "ETA": 1.9, "epoch": 0.680816851583856, "fp16_scale": 1.0, "global_step": 2117, "grad_norm": 2.206872828524902, "learning_rate": 4.883151898237329e-07, "loss": 0.3632, "step": 2117 }, { "ETA": 1.89, "epoch": 0.6811384466956102, "fp16_scale": 1.0, "global_step": 2118, "grad_norm": 2.048596899499758, "learning_rate": 4.874202198907502e-07, "loss": 0.4086, "step": 2118 }, { "ETA": 1.89, "epoch": 0.6814600418073645, "fp16_scale": 1.0, "global_step": 2119, "grad_norm": 1.725224121911189, "learning_rate": 4.865258064851578e-07, "loss": 0.4006, "step": 2119 }, { "ETA": 1.89, "epoch": 0.6817816369191189, "fp16_scale": 1.0, "global_step": 2120, "grad_norm": 2.1542462909235756, "learning_rate": 4.856319505780547e-07, "loss": 0.4415, "step": 2120 }, { "ETA": 1.89, "epoch": 0.6821032320308731, "fp16_scale": 1.0, "global_step": 2121, "grad_norm": 2.080492885117813, "learning_rate": 4.847386531399339e-07, "loss": 0.408, "step": 2121 }, { "ETA": 1.89, "epoch": 0.6824248271426274, "fp16_scale": 1.0, "global_step": 2122, "grad_norm": 2.2094173637517773, "learning_rate": 4.838459151406822e-07, "loss": 0.4346, "step": 2122 }, { "ETA": 1.88, "epoch": 0.6827464222543818, "fp16_scale": 1.0, "global_step": 2123, "grad_norm": 1.9387074487488687, "learning_rate": 4.829537375495798e-07, "loss": 0.435, "step": 2123 }, { "ETA": 1.88, "epoch": 0.683068017366136, "fp16_scale": 1.0, "global_step": 2124, "grad_norm": 2.0396843533535582, "learning_rate": 4.82062121335297e-07, "loss": 0.3722, "step": 2124 }, { "ETA": 1.88, "epoch": 0.6833896124778903, "fp16_scale": 1.0, "global_step": 2125, "grad_norm": 2.233077561115142, "learning_rate": 4.811710674658968e-07, "loss": 0.3987, "step": 2125 }, { "ETA": 1.88, "epoch": 0.6837112075896447, "fp16_scale": 1.0, "global_step": 2126, "grad_norm": 2.0868138718712257, "learning_rate": 4.802805769088298e-07, "loss": 0.4707, "step": 2126 }, { "ETA": 1.88, "epoch": 0.684032802701399, "fp16_scale": 1.0, "global_step": 2127, "grad_norm": 1.8915169582337128, "learning_rate": 4.793906506309347e-07, "loss": 0.4742, "step": 2127 }, { "ETA": 1.87, "epoch": 0.6843543978131532, "fp16_scale": 1.0, "global_step": 2128, "grad_norm": 2.06189918024411, "learning_rate": 4.785012895984397e-07, "loss": 0.3803, "step": 2128 }, { "ETA": 1.87, "epoch": 0.6846759929249076, "fp16_scale": 1.0, "global_step": 2129, "grad_norm": 1.8873364369445147, "learning_rate": 4.776124947769566e-07, "loss": 0.397, "step": 2129 }, { "ETA": 1.87, "epoch": 0.6849975880366619, "fp16_scale": 1.0, "global_step": 2130, "grad_norm": 2.0283057529756467, "learning_rate": 4.767242671314846e-07, "loss": 0.4939, "step": 2130 }, { "ETA": 1.87, "epoch": 0.6853191831484161, "fp16_scale": 1.0, "global_step": 2131, "grad_norm": 2.104418369293907, "learning_rate": 4.758366076264061e-07, "loss": 0.2901, "step": 2131 }, { "ETA": 1.87, "epoch": 0.6856407782601704, "fp16_scale": 1.0, "global_step": 2132, "grad_norm": 1.976994436965051, "learning_rate": 4.7494951722548726e-07, "loss": 0.4407, "step": 2132 }, { "ETA": 1.86, "epoch": 0.6859623733719248, "fp16_scale": 1.0, "global_step": 2133, "grad_norm": 2.227745365332368, "learning_rate": 4.7406299689187557e-07, "loss": 0.5125, "step": 2133 }, { "ETA": 1.86, "epoch": 0.686283968483679, "fp16_scale": 1.0, "global_step": 2134, "grad_norm": 1.8143522659584321, "learning_rate": 4.731770475880994e-07, "loss": 0.4689, "step": 2134 }, { "ETA": 1.86, "epoch": 0.6866055635954333, "fp16_scale": 1.0, "global_step": 2135, "grad_norm": 1.7957129290625795, "learning_rate": 4.722916702760682e-07, "loss": 0.381, "step": 2135 }, { "ETA": 1.86, "epoch": 0.6869271587071877, "fp16_scale": 1.0, "global_step": 2136, "grad_norm": 2.008111581914058, "learning_rate": 4.714068659170698e-07, "loss": 0.5392, "step": 2136 }, { "ETA": 1.86, "epoch": 0.6872487538189419, "fp16_scale": 1.0, "global_step": 2137, "grad_norm": 1.9949153156117356, "learning_rate": 4.705226354717703e-07, "loss": 0.3615, "step": 2137 }, { "ETA": 1.86, "epoch": 0.6875703489306962, "fp16_scale": 1.0, "global_step": 2138, "grad_norm": 1.8513909420105987, "learning_rate": 4.6963897990021197e-07, "loss": 0.3473, "step": 2138 }, { "ETA": 1.85, "epoch": 0.6878919440424506, "fp16_scale": 1.0, "global_step": 2139, "grad_norm": 2.0767110857719704, "learning_rate": 4.687559001618131e-07, "loss": 0.4204, "step": 2139 }, { "ETA": 1.85, "epoch": 0.6882135391542048, "fp16_scale": 1.0, "global_step": 2140, "grad_norm": 2.1147501902316455, "learning_rate": 4.6787339721536724e-07, "loss": 0.4133, "step": 2140 }, { "ETA": 1.85, "epoch": 0.6885351342659591, "fp16_scale": 1.0, "global_step": 2141, "grad_norm": 1.9236443785166624, "learning_rate": 4.6699147201904143e-07, "loss": 0.4067, "step": 2141 }, { "ETA": 1.85, "epoch": 0.6888567293777135, "fp16_scale": 1.0, "global_step": 2142, "grad_norm": 2.0996085018750157, "learning_rate": 4.66110125530376e-07, "loss": 0.4468, "step": 2142 }, { "ETA": 1.85, "epoch": 0.6891783244894678, "fp16_scale": 1.0, "global_step": 2143, "grad_norm": 1.828121616899122, "learning_rate": 4.652293587062819e-07, "loss": 0.4072, "step": 2143 }, { "ETA": 1.84, "epoch": 0.689499919601222, "fp16_scale": 1.0, "global_step": 2144, "grad_norm": 1.8826247487337693, "learning_rate": 4.6434917250304076e-07, "loss": 0.4363, "step": 2144 }, { "ETA": 1.84, "epoch": 0.6898215147129764, "fp16_scale": 1.0, "global_step": 2145, "grad_norm": 2.1052871533519397, "learning_rate": 4.634695678763052e-07, "loss": 0.4686, "step": 2145 }, { "ETA": 1.84, "epoch": 0.6901431098247307, "fp16_scale": 1.0, "global_step": 2146, "grad_norm": 1.978894968730985, "learning_rate": 4.625905457810942e-07, "loss": 0.4116, "step": 2146 }, { "ETA": 1.84, "epoch": 0.6904647049364849, "fp16_scale": 1.0, "global_step": 2147, "grad_norm": 1.8457556993416007, "learning_rate": 4.617121071717971e-07, "loss": 0.3796, "step": 2147 }, { "ETA": 1.84, "epoch": 0.6907863000482393, "fp16_scale": 1.0, "global_step": 2148, "grad_norm": 1.775233947226462, "learning_rate": 4.608342530021669e-07, "loss": 0.428, "step": 2148 }, { "ETA": 1.83, "epoch": 0.6911078951599936, "fp16_scale": 1.0, "global_step": 2149, "grad_norm": 2.4059448105074632, "learning_rate": 4.599569842253244e-07, "loss": 0.3959, "step": 2149 }, { "ETA": 1.83, "epoch": 0.6914294902717478, "fp16_scale": 1.0, "global_step": 2150, "grad_norm": 1.9336965225715175, "learning_rate": 4.590803017937529e-07, "loss": 0.4088, "step": 2150 }, { "ETA": 1.83, "epoch": 0.6917510853835022, "fp16_scale": 1.0, "global_step": 2151, "grad_norm": 1.9766746933849264, "learning_rate": 4.582042066592998e-07, "loss": 0.4478, "step": 2151 }, { "ETA": 1.83, "epoch": 0.6920726804952565, "fp16_scale": 1.0, "global_step": 2152, "grad_norm": 1.7331008700279942, "learning_rate": 4.5732869977317535e-07, "loss": 0.4382, "step": 2152 }, { "ETA": 1.83, "epoch": 0.6923942756070107, "fp16_scale": 1.0, "global_step": 2153, "grad_norm": 2.111208298261346, "learning_rate": 4.5645378208595055e-07, "loss": 0.4339, "step": 2153 }, { "ETA": 1.82, "epoch": 0.6927158707187651, "fp16_scale": 1.0, "global_step": 2154, "grad_norm": 1.9885480809010154, "learning_rate": 4.555794545475573e-07, "loss": 0.446, "step": 2154 }, { "ETA": 1.82, "epoch": 0.6930374658305194, "fp16_scale": 1.0, "global_step": 2155, "grad_norm": 2.0731087672033146, "learning_rate": 4.547057181072861e-07, "loss": 0.4682, "step": 2155 }, { "ETA": 1.82, "epoch": 0.6933590609422737, "fp16_scale": 1.0, "global_step": 2156, "grad_norm": 1.967125013570481, "learning_rate": 4.5383257371378524e-07, "loss": 0.4705, "step": 2156 }, { "ETA": 1.82, "epoch": 0.693680656054028, "fp16_scale": 1.0, "global_step": 2157, "grad_norm": 2.052917753719661, "learning_rate": 4.5296002231506145e-07, "loss": 0.4523, "step": 2157 }, { "ETA": 1.82, "epoch": 0.6940022511657823, "fp16_scale": 1.0, "global_step": 2158, "grad_norm": 2.205481787919023, "learning_rate": 4.5208806485847693e-07, "loss": 0.3519, "step": 2158 }, { "ETA": 1.81, "epoch": 0.6943238462775366, "fp16_scale": 1.0, "global_step": 2159, "grad_norm": 1.9347264421264965, "learning_rate": 4.512167022907494e-07, "loss": 0.5172, "step": 2159 }, { "ETA": 1.81, "epoch": 0.6946454413892909, "fp16_scale": 1.0, "global_step": 2160, "grad_norm": 1.9246825347822658, "learning_rate": 4.503459355579501e-07, "loss": 0.4234, "step": 2160 }, { "ETA": 1.81, "epoch": 0.6949670365010452, "fp16_scale": 1.0, "global_step": 2161, "grad_norm": 1.8277123255326062, "learning_rate": 4.4947576560550326e-07, "loss": 0.4084, "step": 2161 }, { "ETA": 1.81, "epoch": 0.6952886316127995, "fp16_scale": 1.0, "global_step": 2162, "grad_norm": 2.0570802166174214, "learning_rate": 4.4860619337818586e-07, "loss": 0.485, "step": 2162 }, { "ETA": 1.81, "epoch": 0.6956102267245537, "fp16_scale": 1.0, "global_step": 2163, "grad_norm": 1.9156853110868317, "learning_rate": 4.477372198201256e-07, "loss": 0.4717, "step": 2163 }, { "ETA": 1.81, "epoch": 0.6959318218363081, "fp16_scale": 1.0, "global_step": 2164, "grad_norm": 1.9396859746301647, "learning_rate": 4.4686884587480056e-07, "loss": 0.4048, "step": 2164 }, { "ETA": 1.8, "epoch": 0.6962534169480624, "fp16_scale": 1.0, "global_step": 2165, "grad_norm": 2.0950628522505985, "learning_rate": 4.460010724850367e-07, "loss": 0.3888, "step": 2165 }, { "ETA": 1.8, "epoch": 0.6965750120598166, "fp16_scale": 1.0, "global_step": 2166, "grad_norm": 2.257864856135109, "learning_rate": 4.451339005930094e-07, "loss": 0.3402, "step": 2166 }, { "ETA": 1.8, "epoch": 0.696896607171571, "fp16_scale": 1.0, "global_step": 2167, "grad_norm": 1.9499895335908553, "learning_rate": 4.4426733114023975e-07, "loss": 0.4476, "step": 2167 }, { "ETA": 1.8, "epoch": 0.6972182022833253, "fp16_scale": 1.0, "global_step": 2168, "grad_norm": 1.8549561588819414, "learning_rate": 4.4340136506759486e-07, "loss": 0.3655, "step": 2168 }, { "ETA": 1.79, "epoch": 0.6975397973950795, "fp16_scale": 1.0, "global_step": 2169, "grad_norm": 2.2544356108635646, "learning_rate": 4.425360033152875e-07, "loss": 0.3345, "step": 2169 }, { "ETA": 1.79, "epoch": 0.6978613925068339, "fp16_scale": 1.0, "global_step": 2170, "grad_norm": 1.9747679165142917, "learning_rate": 4.416712468228738e-07, "loss": 0.4603, "step": 2170 }, { "ETA": 1.79, "epoch": 0.6981829876185882, "fp16_scale": 1.0, "global_step": 2171, "grad_norm": 2.042816511574314, "learning_rate": 4.408070965292533e-07, "loss": 0.4746, "step": 2171 }, { "ETA": 1.79, "epoch": 0.6985045827303425, "fp16_scale": 1.0, "global_step": 2172, "grad_norm": 1.8521673086720718, "learning_rate": 4.399435533726664e-07, "loss": 0.3694, "step": 2172 }, { "ETA": 1.79, "epoch": 0.6988261778420968, "fp16_scale": 1.0, "global_step": 2173, "grad_norm": 2.0284223100857472, "learning_rate": 4.3908061829069456e-07, "loss": 0.4253, "step": 2173 }, { "ETA": 1.79, "epoch": 0.6991477729538511, "fp16_scale": 1.0, "global_step": 2174, "grad_norm": 2.1478253391524733, "learning_rate": 4.382182922202595e-07, "loss": 0.3783, "step": 2174 }, { "ETA": 1.78, "epoch": 0.6994693680656054, "fp16_scale": 1.0, "global_step": 2175, "grad_norm": 1.964934515334503, "learning_rate": 4.3735657609762157e-07, "loss": 0.4042, "step": 2175 }, { "ETA": 1.78, "epoch": 0.6997909631773597, "fp16_scale": 1.0, "global_step": 2176, "grad_norm": 1.9394408594447183, "learning_rate": 4.364954708583791e-07, "loss": 0.3743, "step": 2176 }, { "ETA": 1.78, "epoch": 0.700112558289114, "fp16_scale": 1.0, "global_step": 2177, "grad_norm": 2.2539404168304786, "learning_rate": 4.3563497743746615e-07, "loss": 0.43, "step": 2177 }, { "ETA": 1.78, "epoch": 0.7004341534008683, "fp16_scale": 1.0, "global_step": 2178, "grad_norm": 2.390806632951474, "learning_rate": 4.347750967691539e-07, "loss": 0.3694, "step": 2178 }, { "ETA": 1.77, "epoch": 0.7007557485126226, "fp16_scale": 1.0, "global_step": 2179, "grad_norm": 1.987087416778606, "learning_rate": 4.339158297870469e-07, "loss": 0.354, "step": 2179 }, { "ETA": 1.77, "epoch": 0.7010773436243769, "fp16_scale": 1.0, "global_step": 2180, "grad_norm": 2.0974630478121923, "learning_rate": 4.330571774240842e-07, "loss": 0.3789, "step": 2180 }, { "ETA": 1.77, "epoch": 0.7013989387361312, "fp16_scale": 1.0, "global_step": 2181, "grad_norm": 2.000184785878777, "learning_rate": 4.3219914061253793e-07, "loss": 0.3766, "step": 2181 }, { "ETA": 1.77, "epoch": 0.7017205338478855, "fp16_scale": 1.0, "global_step": 2182, "grad_norm": 2.1133007929286753, "learning_rate": 4.313417202840106e-07, "loss": 0.4462, "step": 2182 }, { "ETA": 1.77, "epoch": 0.7020421289596398, "fp16_scale": 1.0, "global_step": 2183, "grad_norm": 1.974731726211235, "learning_rate": 4.3048491736943683e-07, "loss": 0.4428, "step": 2183 }, { "ETA": 1.77, "epoch": 0.7023637240713941, "fp16_scale": 1.0, "global_step": 2184, "grad_norm": 1.9994266341684352, "learning_rate": 4.2962873279907963e-07, "loss": 0.4203, "step": 2184 }, { "ETA": 1.76, "epoch": 0.7026853191831485, "fp16_scale": 1.0, "global_step": 2185, "grad_norm": 2.1987989561751657, "learning_rate": 4.2877316750253077e-07, "loss": 0.3732, "step": 2185 }, { "ETA": 1.76, "epoch": 0.7030069142949027, "fp16_scale": 1.0, "global_step": 2186, "grad_norm": 1.9440227895993096, "learning_rate": 4.2791822240871134e-07, "loss": 0.3947, "step": 2186 }, { "ETA": 1.76, "epoch": 0.703328509406657, "fp16_scale": 1.0, "global_step": 2187, "grad_norm": 2.0467861264126075, "learning_rate": 4.270638984458668e-07, "loss": 0.3372, "step": 2187 }, { "ETA": 1.76, "epoch": 0.7036501045184114, "fp16_scale": 1.0, "global_step": 2188, "grad_norm": 1.771590413871306, "learning_rate": 4.2621019654156976e-07, "loss": 0.398, "step": 2188 }, { "ETA": 1.76, "epoch": 0.7039716996301656, "fp16_scale": 1.0, "global_step": 2189, "grad_norm": 2.05412758035908, "learning_rate": 4.253571176227168e-07, "loss": 0.3881, "step": 2189 }, { "ETA": 1.75, "epoch": 0.7042932947419199, "fp16_scale": 1.0, "global_step": 2190, "grad_norm": 1.9042821216063328, "learning_rate": 4.245046626155275e-07, "loss": 0.4662, "step": 2190 }, { "ETA": 1.75, "epoch": 0.7046148898536743, "fp16_scale": 1.0, "global_step": 2191, "grad_norm": 1.89030800830198, "learning_rate": 4.236528324455454e-07, "loss": 0.356, "step": 2191 }, { "ETA": 1.75, "epoch": 0.7049364849654285, "fp16_scale": 1.0, "global_step": 2192, "grad_norm": 1.9532946530918986, "learning_rate": 4.2280162803763487e-07, "loss": 0.4071, "step": 2192 }, { "ETA": 1.75, "epoch": 0.7052580800771828, "fp16_scale": 1.0, "global_step": 2193, "grad_norm": 2.2138783848840022, "learning_rate": 4.2195105031598123e-07, "loss": 0.4353, "step": 2193 }, { "ETA": 1.75, "epoch": 0.7055796751889372, "fp16_scale": 1.0, "global_step": 2194, "grad_norm": 1.7951242698976733, "learning_rate": 4.211011002040885e-07, "loss": 0.3116, "step": 2194 }, { "ETA": 1.74, "epoch": 0.7059012703006914, "fp16_scale": 1.0, "global_step": 2195, "grad_norm": 2.0072448973292905, "learning_rate": 4.2025177862478057e-07, "loss": 0.4307, "step": 2195 }, { "ETA": 1.74, "epoch": 0.7062228654124457, "fp16_scale": 1.0, "global_step": 2196, "grad_norm": 1.8684595639663752, "learning_rate": 4.194030865001974e-07, "loss": 0.3894, "step": 2196 }, { "ETA": 1.74, "epoch": 0.7065444605242001, "fp16_scale": 1.0, "global_step": 2197, "grad_norm": 1.829405941945004, "learning_rate": 4.185550247517969e-07, "loss": 0.3636, "step": 2197 }, { "ETA": 1.74, "epoch": 0.7068660556359543, "fp16_scale": 1.0, "global_step": 2198, "grad_norm": 2.1678571316880926, "learning_rate": 4.1770759430035217e-07, "loss": 0.345, "step": 2198 }, { "ETA": 1.74, "epoch": 0.7071876507477086, "fp16_scale": 1.0, "global_step": 2199, "grad_norm": 1.9344641864621686, "learning_rate": 4.1686079606595027e-07, "loss": 0.4396, "step": 2199 }, { "ETA": 1.73, "epoch": 0.707509245859463, "fp16_scale": 1.0, "global_step": 2200, "grad_norm": 1.950038444686929, "learning_rate": 4.1601463096799274e-07, "loss": 0.4394, "step": 2200 }, { "ETA": 1.74, "epoch": 0.7078308409712173, "fp16_scale": 1.0, "global_step": 2201, "grad_norm": 2.194075025199006, "learning_rate": 4.151690999251928e-07, "loss": 0.47, "step": 2201 }, { "ETA": 1.73, "epoch": 0.7081524360829715, "fp16_scale": 1.0, "global_step": 2202, "grad_norm": 2.0737549787241236, "learning_rate": 4.1432420385557577e-07, "loss": 0.3843, "step": 2202 }, { "ETA": 1.73, "epoch": 0.7084740311947259, "fp16_scale": 1.0, "global_step": 2203, "grad_norm": 1.922407145802047, "learning_rate": 4.1347994367647797e-07, "loss": 0.4362, "step": 2203 }, { "ETA": 1.73, "epoch": 0.7087956263064802, "fp16_scale": 1.0, "global_step": 2204, "grad_norm": 2.179140931747294, "learning_rate": 4.126363203045443e-07, "loss": 0.4519, "step": 2204 }, { "ETA": 1.73, "epoch": 0.7091172214182344, "fp16_scale": 1.0, "global_step": 2205, "grad_norm": 2.195258399591118, "learning_rate": 4.117933346557293e-07, "loss": 0.3956, "step": 2205 }, { "ETA": 1.73, "epoch": 0.7094388165299887, "fp16_scale": 1.0, "global_step": 2206, "grad_norm": 1.9770106328929318, "learning_rate": 4.109509876452939e-07, "loss": 0.4615, "step": 2206 }, { "ETA": 1.72, "epoch": 0.7097604116417431, "fp16_scale": 1.0, "global_step": 2207, "grad_norm": 1.9116333616905046, "learning_rate": 4.101092801878068e-07, "loss": 0.3323, "step": 2207 }, { "ETA": 1.72, "epoch": 0.7100820067534973, "fp16_scale": 1.0, "global_step": 2208, "grad_norm": 2.0070177339096715, "learning_rate": 4.092682131971421e-07, "loss": 0.5249, "step": 2208 }, { "ETA": 1.72, "epoch": 0.7104036018652516, "fp16_scale": 1.0, "global_step": 2209, "grad_norm": 1.9920602145139588, "learning_rate": 4.0842778758647754e-07, "loss": 0.4085, "step": 2209 }, { "ETA": 1.72, "epoch": 0.710725196977006, "fp16_scale": 1.0, "global_step": 2210, "grad_norm": 2.086253029698317, "learning_rate": 4.0758800426829596e-07, "loss": 0.3245, "step": 2210 }, { "ETA": 1.72, "epoch": 0.7110467920887602, "fp16_scale": 1.0, "global_step": 2211, "grad_norm": 1.8523812149571812, "learning_rate": 4.0674886415438146e-07, "loss": 0.3613, "step": 2211 }, { "ETA": 1.71, "epoch": 0.7113683872005145, "fp16_scale": 1.0, "global_step": 2212, "grad_norm": 1.878433095918315, "learning_rate": 4.05910368155821e-07, "loss": 0.3676, "step": 2212 }, { "ETA": 1.71, "epoch": 0.7116899823122689, "fp16_scale": 1.0, "global_step": 2213, "grad_norm": 2.07030816042548, "learning_rate": 4.050725171830011e-07, "loss": 0.4186, "step": 2213 }, { "ETA": 1.71, "epoch": 0.7120115774240231, "fp16_scale": 1.0, "global_step": 2214, "grad_norm": 1.991886177909802, "learning_rate": 4.042353121456086e-07, "loss": 0.4691, "step": 2214 }, { "ETA": 1.71, "epoch": 0.7123331725357774, "fp16_scale": 1.0, "global_step": 2215, "grad_norm": 2.2279734922669476, "learning_rate": 4.0339875395262937e-07, "loss": 0.4557, "step": 2215 }, { "ETA": 1.71, "epoch": 0.7126547676475318, "fp16_scale": 1.0, "global_step": 2216, "grad_norm": 2.119008274564329, "learning_rate": 4.025628435123457e-07, "loss": 0.3971, "step": 2216 }, { "ETA": 1.7, "epoch": 0.7129763627592861, "fp16_scale": 1.0, "global_step": 2217, "grad_norm": 1.7664745654548895, "learning_rate": 4.017275817323382e-07, "loss": 0.5009, "step": 2217 }, { "ETA": 1.7, "epoch": 0.7132979578710403, "fp16_scale": 1.0, "global_step": 2218, "grad_norm": 1.8939007121730727, "learning_rate": 4.008929695194818e-07, "loss": 0.4417, "step": 2218 }, { "ETA": 1.7, "epoch": 0.7136195529827947, "fp16_scale": 1.0, "global_step": 2219, "grad_norm": 2.017407527687127, "learning_rate": 4.000590077799468e-07, "loss": 0.3961, "step": 2219 }, { "ETA": 1.7, "epoch": 0.713941148094549, "fp16_scale": 1.0, "global_step": 2220, "grad_norm": 2.1317372458820447, "learning_rate": 3.99225697419198e-07, "loss": 0.5324, "step": 2220 }, { "ETA": 1.7, "epoch": 0.7142627432063032, "fp16_scale": 1.0, "global_step": 2221, "grad_norm": 1.981381768894328, "learning_rate": 3.983930393419911e-07, "loss": 0.3655, "step": 2221 }, { "ETA": 1.69, "epoch": 0.7145843383180576, "fp16_scale": 1.0, "global_step": 2222, "grad_norm": 2.3060469712534837, "learning_rate": 3.9756103445237564e-07, "loss": 0.3862, "step": 2222 }, { "ETA": 1.69, "epoch": 0.7149059334298119, "fp16_scale": 1.0, "global_step": 2223, "grad_norm": 2.214203361396411, "learning_rate": 3.967296836536902e-07, "loss": 0.4544, "step": 2223 }, { "ETA": 1.69, "epoch": 0.7152275285415661, "fp16_scale": 1.0, "global_step": 2224, "grad_norm": 2.4956829318349123, "learning_rate": 3.9589898784856435e-07, "loss": 0.4377, "step": 2224 }, { "ETA": 1.69, "epoch": 0.7155491236533205, "fp16_scale": 1.0, "global_step": 2225, "grad_norm": 1.8896479220175337, "learning_rate": 3.9506894793891654e-07, "loss": 0.4722, "step": 2225 }, { "ETA": 1.69, "epoch": 0.7158707187650748, "fp16_scale": 1.0, "global_step": 2226, "grad_norm": 1.8571813908637695, "learning_rate": 3.94239564825952e-07, "loss": 0.3962, "step": 2226 }, { "ETA": 1.68, "epoch": 0.716192313876829, "fp16_scale": 1.0, "global_step": 2227, "grad_norm": 2.193852216256546, "learning_rate": 3.934108394101644e-07, "loss": 0.3723, "step": 2227 }, { "ETA": 1.68, "epoch": 0.7165139089885834, "fp16_scale": 1.0, "global_step": 2228, "grad_norm": 1.8704927468231431, "learning_rate": 3.925827725913315e-07, "loss": 0.4438, "step": 2228 }, { "ETA": 1.68, "epoch": 0.7168355041003377, "fp16_scale": 1.0, "global_step": 2229, "grad_norm": 2.166932787688569, "learning_rate": 3.9175536526851773e-07, "loss": 0.4531, "step": 2229 }, { "ETA": 1.68, "epoch": 0.717157099212092, "fp16_scale": 1.0, "global_step": 2230, "grad_norm": 1.8801807926539045, "learning_rate": 3.9092861834007074e-07, "loss": 0.4107, "step": 2230 }, { "ETA": 1.68, "epoch": 0.7174786943238463, "fp16_scale": 1.0, "global_step": 2231, "grad_norm": 2.276845077870314, "learning_rate": 3.901025327036206e-07, "loss": 0.5075, "step": 2231 }, { "ETA": 1.68, "epoch": 0.7178002894356006, "fp16_scale": 1.0, "global_step": 2232, "grad_norm": 2.160478450654775, "learning_rate": 3.892771092560807e-07, "loss": 0.422, "step": 2232 }, { "ETA": 1.67, "epoch": 0.7181218845473549, "fp16_scale": 1.0, "global_step": 2233, "grad_norm": 2.15339668025265, "learning_rate": 3.8845234889364386e-07, "loss": 0.4008, "step": 2233 }, { "ETA": 1.67, "epoch": 0.7184434796591092, "fp16_scale": 1.0, "global_step": 2234, "grad_norm": 2.0181357334308663, "learning_rate": 3.8762825251178466e-07, "loss": 0.4553, "step": 2234 }, { "ETA": 1.67, "epoch": 0.7187650747708635, "fp16_scale": 1.0, "global_step": 2235, "grad_norm": 2.071829424313654, "learning_rate": 3.868048210052551e-07, "loss": 0.4629, "step": 2235 }, { "ETA": 1.67, "epoch": 0.7190866698826178, "fp16_scale": 1.0, "global_step": 2236, "grad_norm": 2.316577421948334, "learning_rate": 3.859820552680867e-07, "loss": 0.4874, "step": 2236 }, { "ETA": 1.67, "epoch": 0.719408264994372, "fp16_scale": 1.0, "global_step": 2237, "grad_norm": 1.8859096372417163, "learning_rate": 3.851599561935877e-07, "loss": 0.4471, "step": 2237 }, { "ETA": 1.66, "epoch": 0.7197298601061264, "fp16_scale": 1.0, "global_step": 2238, "grad_norm": 2.072028466306422, "learning_rate": 3.843385246743417e-07, "loss": 0.3532, "step": 2238 }, { "ETA": 1.66, "epoch": 0.7200514552178807, "fp16_scale": 1.0, "global_step": 2239, "grad_norm": 2.2699672922909206, "learning_rate": 3.8351776160220894e-07, "loss": 0.5076, "step": 2239 }, { "ETA": 1.66, "epoch": 0.720373050329635, "fp16_scale": 1.0, "global_step": 2240, "grad_norm": 2.1749742047782172, "learning_rate": 3.8269766786832245e-07, "loss": 0.4127, "step": 2240 }, { "ETA": 1.66, "epoch": 0.7206946454413893, "fp16_scale": 1.0, "global_step": 2241, "grad_norm": 1.8004861275069077, "learning_rate": 3.818782443630897e-07, "loss": 0.439, "step": 2241 }, { "ETA": 1.66, "epoch": 0.7210162405531436, "fp16_scale": 1.0, "global_step": 2242, "grad_norm": 1.9692018267027478, "learning_rate": 3.8105949197619e-07, "loss": 0.4527, "step": 2242 }, { "ETA": 1.65, "epoch": 0.7213378356648978, "fp16_scale": 1.0, "global_step": 2243, "grad_norm": 1.8598905917571928, "learning_rate": 3.802414115965736e-07, "loss": 0.3778, "step": 2243 }, { "ETA": 1.65, "epoch": 0.7216594307766522, "fp16_scale": 1.0, "global_step": 2244, "grad_norm": 1.9180948493602807, "learning_rate": 3.794240041124622e-07, "loss": 0.3541, "step": 2244 }, { "ETA": 1.65, "epoch": 0.7219810258884065, "fp16_scale": 1.0, "global_step": 2245, "grad_norm": 2.2140959932998925, "learning_rate": 3.7860727041134553e-07, "loss": 0.4151, "step": 2245 }, { "ETA": 1.65, "epoch": 0.7223026210001608, "fp16_scale": 1.0, "global_step": 2246, "grad_norm": 1.8895681799785997, "learning_rate": 3.7779121137998273e-07, "loss": 0.4091, "step": 2246 }, { "ETA": 1.65, "epoch": 0.7226242161119151, "fp16_scale": 1.0, "global_step": 2247, "grad_norm": 2.154992159649028, "learning_rate": 3.769758279044005e-07, "loss": 0.4338, "step": 2247 }, { "ETA": 1.64, "epoch": 0.7229458112236694, "fp16_scale": 1.0, "global_step": 2248, "grad_norm": 1.9326745202783173, "learning_rate": 3.761611208698912e-07, "loss": 0.346, "step": 2248 }, { "ETA": 1.64, "epoch": 0.7232674063354237, "fp16_scale": 1.0, "global_step": 2249, "grad_norm": 1.976550975144646, "learning_rate": 3.7534709116101383e-07, "loss": 0.3921, "step": 2249 }, { "ETA": 1.64, "epoch": 0.723589001447178, "fp16_scale": 1.0, "global_step": 2250, "grad_norm": 1.8544961526694541, "learning_rate": 3.745337396615909e-07, "loss": 0.3978, "step": 2250 }, { "ETA": 1.64, "epoch": 0.7239105965589323, "fp16_scale": 1.0, "global_step": 2251, "grad_norm": 1.9565133967962072, "learning_rate": 3.737210672547093e-07, "loss": 0.4828, "step": 2251 }, { "ETA": 1.64, "epoch": 0.7242321916706866, "fp16_scale": 1.0, "global_step": 2252, "grad_norm": 1.8860209211586123, "learning_rate": 3.729090748227186e-07, "loss": 0.5123, "step": 2252 }, { "ETA": 1.63, "epoch": 0.7245537867824409, "fp16_scale": 1.0, "global_step": 2253, "grad_norm": 2.0462207476491434, "learning_rate": 3.7209776324723006e-07, "loss": 0.3653, "step": 2253 }, { "ETA": 1.63, "epoch": 0.7248753818941952, "fp16_scale": 1.0, "global_step": 2254, "grad_norm": 1.830256212384786, "learning_rate": 3.7128713340911534e-07, "loss": 0.4377, "step": 2254 }, { "ETA": 1.63, "epoch": 0.7251969770059495, "fp16_scale": 1.0, "global_step": 2255, "grad_norm": 1.7475762399120987, "learning_rate": 3.704771861885058e-07, "loss": 0.4223, "step": 2255 }, { "ETA": 1.63, "epoch": 0.7255185721177038, "fp16_scale": 1.0, "global_step": 2256, "grad_norm": 1.976400225248446, "learning_rate": 3.6966792246479253e-07, "loss": 0.4513, "step": 2256 }, { "ETA": 1.63, "epoch": 0.7258401672294581, "fp16_scale": 1.0, "global_step": 2257, "grad_norm": 2.166177005329153, "learning_rate": 3.6885934311662334e-07, "loss": 0.4618, "step": 2257 }, { "ETA": 1.62, "epoch": 0.7261617623412124, "fp16_scale": 1.0, "global_step": 2258, "grad_norm": 1.8911059330295397, "learning_rate": 3.680514490219041e-07, "loss": 0.4218, "step": 2258 }, { "ETA": 1.62, "epoch": 0.7264833574529667, "fp16_scale": 1.0, "global_step": 2259, "grad_norm": 2.110615272128805, "learning_rate": 3.672442410577965e-07, "loss": 0.4195, "step": 2259 }, { "ETA": 1.62, "epoch": 0.726804952564721, "fp16_scale": 1.0, "global_step": 2260, "grad_norm": 1.8987697768798977, "learning_rate": 3.6643772010071617e-07, "loss": 0.4004, "step": 2260 }, { "ETA": 1.62, "epoch": 0.7271265476764753, "fp16_scale": 1.0, "global_step": 2261, "grad_norm": 2.1312838534787386, "learning_rate": 3.656318870263344e-07, "loss": 0.4779, "step": 2261 }, { "ETA": 1.62, "epoch": 0.7274481427882297, "fp16_scale": 1.0, "global_step": 2262, "grad_norm": 1.8400510723318806, "learning_rate": 3.648267427095741e-07, "loss": 0.4164, "step": 2262 }, { "ETA": 1.62, "epoch": 0.7277697378999839, "fp16_scale": 1.0, "global_step": 2263, "grad_norm": 2.1056389829781277, "learning_rate": 3.6402228802461164e-07, "loss": 0.5058, "step": 2263 }, { "ETA": 1.61, "epoch": 0.7280913330117382, "fp16_scale": 1.0, "global_step": 2264, "grad_norm": 1.851103265295296, "learning_rate": 3.6321852384487395e-07, "loss": 0.4628, "step": 2264 }, { "ETA": 1.61, "epoch": 0.7284129281234926, "fp16_scale": 1.0, "global_step": 2265, "grad_norm": 1.9934810942961594, "learning_rate": 3.624154510430387e-07, "loss": 0.4373, "step": 2265 }, { "ETA": 1.61, "epoch": 0.7287345232352468, "fp16_scale": 1.0, "global_step": 2266, "grad_norm": 2.0880001743606846, "learning_rate": 3.616130704910324e-07, "loss": 0.4658, "step": 2266 }, { "ETA": 1.61, "epoch": 0.7290561183470011, "fp16_scale": 1.0, "global_step": 2267, "grad_norm": 2.2273833324564873, "learning_rate": 3.608113830600299e-07, "loss": 0.4706, "step": 2267 }, { "ETA": 1.61, "epoch": 0.7293777134587555, "fp16_scale": 1.0, "global_step": 2268, "grad_norm": 1.9394095161631035, "learning_rate": 3.6001038962045395e-07, "loss": 0.3588, "step": 2268 }, { "ETA": 1.6, "epoch": 0.7296993085705097, "fp16_scale": 1.0, "global_step": 2269, "grad_norm": 2.2227064661542353, "learning_rate": 3.592100910419738e-07, "loss": 0.3615, "step": 2269 }, { "ETA": 1.6, "epoch": 0.730020903682264, "fp16_scale": 1.0, "global_step": 2270, "grad_norm": 2.0757467341583484, "learning_rate": 3.5841048819350427e-07, "loss": 0.3497, "step": 2270 }, { "ETA": 1.6, "epoch": 0.7303424987940184, "fp16_scale": 1.0, "global_step": 2271, "grad_norm": 1.9640186278757845, "learning_rate": 3.576115819432043e-07, "loss": 0.4717, "step": 2271 }, { "ETA": 1.6, "epoch": 0.7306640939057726, "fp16_scale": 1.0, "global_step": 2272, "grad_norm": 1.9313114039202268, "learning_rate": 3.568133731584767e-07, "loss": 0.4545, "step": 2272 }, { "ETA": 1.6, "epoch": 0.7309856890175269, "fp16_scale": 1.0, "global_step": 2273, "grad_norm": 1.78254561493774, "learning_rate": 3.560158627059676e-07, "loss": 0.4455, "step": 2273 }, { "ETA": 1.59, "epoch": 0.7313072841292813, "fp16_scale": 1.0, "global_step": 2274, "grad_norm": 1.9168963550033165, "learning_rate": 3.552190514515636e-07, "loss": 0.3942, "step": 2274 }, { "ETA": 1.59, "epoch": 0.7316288792410356, "fp16_scale": 1.0, "global_step": 2275, "grad_norm": 1.9743466155000438, "learning_rate": 3.5442294026039433e-07, "loss": 0.4919, "step": 2275 }, { "ETA": 1.59, "epoch": 0.7319504743527898, "fp16_scale": 1.0, "global_step": 2276, "grad_norm": 1.9509055281718113, "learning_rate": 3.5362752999682724e-07, "loss": 0.4776, "step": 2276 }, { "ETA": 1.59, "epoch": 0.7322720694645442, "fp16_scale": 1.0, "global_step": 2277, "grad_norm": 1.9184878740758493, "learning_rate": 3.528328215244695e-07, "loss": 0.3758, "step": 2277 }, { "ETA": 1.59, "epoch": 0.7325936645762985, "fp16_scale": 1.0, "global_step": 2278, "grad_norm": 1.9837661782560243, "learning_rate": 3.5203881570616667e-07, "loss": 0.5061, "step": 2278 }, { "ETA": 1.58, "epoch": 0.7329152596880527, "fp16_scale": 1.0, "global_step": 2279, "grad_norm": 1.9748864180701593, "learning_rate": 3.512455134040008e-07, "loss": 0.447, "step": 2279 }, { "ETA": 1.58, "epoch": 0.733236854799807, "fp16_scale": 1.0, "global_step": 2280, "grad_norm": 2.1059961968364056, "learning_rate": 3.504529154792905e-07, "loss": 0.4355, "step": 2280 }, { "ETA": 1.58, "epoch": 0.7335584499115614, "fp16_scale": 1.0, "global_step": 2281, "grad_norm": 2.077181442498018, "learning_rate": 3.4966102279258956e-07, "loss": 0.4614, "step": 2281 }, { "ETA": 1.58, "epoch": 0.7338800450233156, "fp16_scale": 1.0, "global_step": 2282, "grad_norm": 1.9069189086089477, "learning_rate": 3.488698362036865e-07, "loss": 0.4018, "step": 2282 }, { "ETA": 1.58, "epoch": 0.7342016401350699, "fp16_scale": 1.0, "global_step": 2283, "grad_norm": 1.9092208735298126, "learning_rate": 3.4807935657160237e-07, "loss": 0.3999, "step": 2283 }, { "ETA": 1.58, "epoch": 0.7345232352468243, "fp16_scale": 1.0, "global_step": 2284, "grad_norm": 2.433976642012897, "learning_rate": 3.472895847545905e-07, "loss": 0.3816, "step": 2284 }, { "ETA": 1.57, "epoch": 0.7348448303585785, "fp16_scale": 1.0, "global_step": 2285, "grad_norm": 2.032070228982937, "learning_rate": 3.4650052161013675e-07, "loss": 0.4042, "step": 2285 }, { "ETA": 1.57, "epoch": 0.7351664254703328, "fp16_scale": 1.0, "global_step": 2286, "grad_norm": 1.9062000739290756, "learning_rate": 3.4571216799495694e-07, "loss": 0.4139, "step": 2286 }, { "ETA": 1.57, "epoch": 0.7354880205820872, "fp16_scale": 1.0, "global_step": 2287, "grad_norm": 1.9101721394666102, "learning_rate": 3.44924524764997e-07, "loss": 0.407, "step": 2287 }, { "ETA": 1.57, "epoch": 0.7358096156938414, "fp16_scale": 1.0, "global_step": 2288, "grad_norm": 2.027508835356294, "learning_rate": 3.441375927754309e-07, "loss": 0.3725, "step": 2288 }, { "ETA": 1.57, "epoch": 0.7361312108055957, "fp16_scale": 1.0, "global_step": 2289, "grad_norm": 2.0837935450157685, "learning_rate": 3.4335137288066006e-07, "loss": 0.4872, "step": 2289 }, { "ETA": 1.56, "epoch": 0.7364528059173501, "fp16_scale": 1.0, "global_step": 2290, "grad_norm": 1.936697090462457, "learning_rate": 3.4256586593431404e-07, "loss": 0.5331, "step": 2290 }, { "ETA": 1.56, "epoch": 0.7367744010291044, "fp16_scale": 1.0, "global_step": 2291, "grad_norm": 1.8008781436797965, "learning_rate": 3.417810727892475e-07, "loss": 0.4559, "step": 2291 }, { "ETA": 1.56, "epoch": 0.7370959961408586, "fp16_scale": 1.0, "global_step": 2292, "grad_norm": 1.8397839886723912, "learning_rate": 3.409969942975407e-07, "loss": 0.4461, "step": 2292 }, { "ETA": 1.56, "epoch": 0.737417591252613, "fp16_scale": 1.0, "global_step": 2293, "grad_norm": 1.942121214519844, "learning_rate": 3.4021363131049665e-07, "loss": 0.3783, "step": 2293 }, { "ETA": 1.56, "epoch": 0.7377391863643673, "fp16_scale": 1.0, "global_step": 2294, "grad_norm": 2.0805562914405806, "learning_rate": 3.3943098467864315e-07, "loss": 0.4683, "step": 2294 }, { "ETA": 1.55, "epoch": 0.7380607814761215, "fp16_scale": 1.0, "global_step": 2295, "grad_norm": 2.100925544342399, "learning_rate": 3.3864905525172913e-07, "loss": 0.4538, "step": 2295 }, { "ETA": 1.55, "epoch": 0.7383823765878759, "fp16_scale": 1.0, "global_step": 2296, "grad_norm": 2.0389002485397487, "learning_rate": 3.378678438787246e-07, "loss": 0.4396, "step": 2296 }, { "ETA": 1.55, "epoch": 0.7387039716996302, "fp16_scale": 1.0, "global_step": 2297, "grad_norm": 1.7637039918390904, "learning_rate": 3.370873514078215e-07, "loss": 0.3777, "step": 2297 }, { "ETA": 1.55, "epoch": 0.7390255668113844, "fp16_scale": 1.0, "global_step": 2298, "grad_norm": 1.8904305792234644, "learning_rate": 3.3630757868642965e-07, "loss": 0.4037, "step": 2298 }, { "ETA": 1.55, "epoch": 0.7393471619231388, "fp16_scale": 1.0, "global_step": 2299, "grad_norm": 1.9799632883906064, "learning_rate": 3.3552852656117837e-07, "loss": 0.4252, "step": 2299 }, { "ETA": 1.54, "epoch": 0.7396687570348931, "fp16_scale": 1.0, "global_step": 2300, "grad_norm": 2.0009972657069133, "learning_rate": 3.34750195877914e-07, "loss": 0.3857, "step": 2300 }, { "ETA": 1.54, "epoch": 0.7399903521466473, "fp16_scale": 1.0, "global_step": 2301, "grad_norm": 1.8936424284333242, "learning_rate": 3.339725874816994e-07, "loss": 0.406, "step": 2301 }, { "ETA": 1.54, "epoch": 0.7403119472584017, "fp16_scale": 1.0, "global_step": 2302, "grad_norm": 2.056499301209626, "learning_rate": 3.3319570221681404e-07, "loss": 0.4455, "step": 2302 }, { "ETA": 1.54, "epoch": 0.740633542370156, "fp16_scale": 1.0, "global_step": 2303, "grad_norm": 2.072575629275413, "learning_rate": 3.3241954092675186e-07, "loss": 0.3989, "step": 2303 }, { "ETA": 1.54, "epoch": 0.7409551374819102, "fp16_scale": 1.0, "global_step": 2304, "grad_norm": 1.8390868300147452, "learning_rate": 3.31644104454221e-07, "loss": 0.4202, "step": 2304 }, { "ETA": 1.53, "epoch": 0.7412767325936646, "fp16_scale": 1.0, "global_step": 2305, "grad_norm": 1.9442922194970502, "learning_rate": 3.308693936411421e-07, "loss": 0.3555, "step": 2305 }, { "ETA": 1.53, "epoch": 0.7415983277054189, "fp16_scale": 1.0, "global_step": 2306, "grad_norm": 1.9263437995603583, "learning_rate": 3.3009540932864777e-07, "loss": 0.4344, "step": 2306 }, { "ETA": 1.53, "epoch": 0.7419199228171732, "fp16_scale": 1.0, "global_step": 2307, "grad_norm": 1.7755929053575106, "learning_rate": 3.293221523570826e-07, "loss": 0.4771, "step": 2307 }, { "ETA": 1.53, "epoch": 0.7422415179289275, "fp16_scale": 1.0, "global_step": 2308, "grad_norm": 2.00979769118329, "learning_rate": 3.2854962356600126e-07, "loss": 0.4371, "step": 2308 }, { "ETA": 1.53, "epoch": 0.7425631130406818, "fp16_scale": 1.0, "global_step": 2309, "grad_norm": 2.3919546338784436, "learning_rate": 3.2777782379416796e-07, "loss": 0.3822, "step": 2309 }, { "ETA": 1.53, "epoch": 0.7428847081524361, "fp16_scale": 1.0, "global_step": 2310, "grad_norm": 1.9899339960693896, "learning_rate": 3.2700675387955434e-07, "loss": 0.4021, "step": 2310 }, { "ETA": 1.52, "epoch": 0.7432063032641903, "fp16_scale": 1.0, "global_step": 2311, "grad_norm": 1.9185900559349034, "learning_rate": 3.2623641465934114e-07, "loss": 0.4688, "step": 2311 }, { "ETA": 1.52, "epoch": 0.7435278983759447, "fp16_scale": 1.0, "global_step": 2312, "grad_norm": 1.8672071580120455, "learning_rate": 3.2546680696991437e-07, "loss": 0.4186, "step": 2312 }, { "ETA": 1.52, "epoch": 0.743849493487699, "fp16_scale": 1.0, "global_step": 2313, "grad_norm": 1.9660977148948693, "learning_rate": 3.246979316468665e-07, "loss": 0.4413, "step": 2313 }, { "ETA": 1.52, "epoch": 0.7441710885994532, "fp16_scale": 1.0, "global_step": 2314, "grad_norm": 1.6828170728926204, "learning_rate": 3.239297895249955e-07, "loss": 0.4026, "step": 2314 }, { "ETA": 1.52, "epoch": 0.7444926837112076, "fp16_scale": 1.0, "global_step": 2315, "grad_norm": 2.1493388414577725, "learning_rate": 3.2316238143830143e-07, "loss": 0.3468, "step": 2315 }, { "ETA": 1.51, "epoch": 0.7448142788229619, "fp16_scale": 1.0, "global_step": 2316, "grad_norm": 2.3164200650949742, "learning_rate": 3.223957082199895e-07, "loss": 0.4149, "step": 2316 }, { "ETA": 1.51, "epoch": 0.7451358739347161, "fp16_scale": 1.0, "global_step": 2317, "grad_norm": 1.9651991773040327, "learning_rate": 3.2162977070246545e-07, "loss": 0.4702, "step": 2317 }, { "ETA": 1.51, "epoch": 0.7454574690464705, "fp16_scale": 1.0, "global_step": 2318, "grad_norm": 1.9553953252173792, "learning_rate": 3.208645697173362e-07, "loss": 0.4139, "step": 2318 }, { "ETA": 1.51, "epoch": 0.7457790641582248, "fp16_scale": 1.0, "global_step": 2319, "grad_norm": 1.9427431440690355, "learning_rate": 3.2010010609541104e-07, "loss": 0.4283, "step": 2319 }, { "ETA": 1.51, "epoch": 0.7461006592699791, "fp16_scale": 1.0, "global_step": 2320, "grad_norm": 1.8627030964806996, "learning_rate": 3.193363806666961e-07, "loss": 0.4009, "step": 2320 }, { "ETA": 1.5, "epoch": 0.7464222543817334, "fp16_scale": 1.0, "global_step": 2321, "grad_norm": 2.0199565518748193, "learning_rate": 3.18573394260398e-07, "loss": 0.3448, "step": 2321 }, { "ETA": 1.5, "epoch": 0.7467438494934877, "fp16_scale": 1.0, "global_step": 2322, "grad_norm": 1.698843236949823, "learning_rate": 3.1781114770491966e-07, "loss": 0.355, "step": 2322 }, { "ETA": 1.5, "epoch": 0.747065444605242, "fp16_scale": 1.0, "global_step": 2323, "grad_norm": 2.002294199773455, "learning_rate": 3.1704964182786085e-07, "loss": 0.4127, "step": 2323 }, { "ETA": 1.5, "epoch": 0.7473870397169963, "fp16_scale": 1.0, "global_step": 2324, "grad_norm": 1.8715369845319172, "learning_rate": 3.1628887745601807e-07, "loss": 0.3584, "step": 2324 }, { "ETA": 1.5, "epoch": 0.7477086348287506, "fp16_scale": 1.0, "global_step": 2325, "grad_norm": 1.8158810424221832, "learning_rate": 3.155288554153819e-07, "loss": 0.4242, "step": 2325 }, { "ETA": 1.49, "epoch": 0.7480302299405049, "fp16_scale": 1.0, "global_step": 2326, "grad_norm": 2.097106697099669, "learning_rate": 3.147695765311377e-07, "loss": 0.4529, "step": 2326 }, { "ETA": 1.49, "epoch": 0.7483518250522592, "fp16_scale": 1.0, "global_step": 2327, "grad_norm": 2.0899355326634392, "learning_rate": 3.140110416276627e-07, "loss": 0.466, "step": 2327 }, { "ETA": 1.49, "epoch": 0.7486734201640135, "fp16_scale": 1.0, "global_step": 2328, "grad_norm": 2.2487782125918603, "learning_rate": 3.132532515285279e-07, "loss": 0.3965, "step": 2328 }, { "ETA": 1.49, "epoch": 0.7489950152757678, "fp16_scale": 1.0, "global_step": 2329, "grad_norm": 1.8858384265180017, "learning_rate": 3.1249620705649416e-07, "loss": 0.4482, "step": 2329 }, { "ETA": 1.49, "epoch": 0.7493166103875221, "fp16_scale": 1.0, "global_step": 2330, "grad_norm": 2.0794448090461506, "learning_rate": 3.1173990903351386e-07, "loss": 0.5206, "step": 2330 }, { "ETA": 1.48, "epoch": 0.7496382054992764, "fp16_scale": 1.0, "global_step": 2331, "grad_norm": 1.9262669051509544, "learning_rate": 3.109843582807289e-07, "loss": 0.3939, "step": 2331 }, { "ETA": 1.48, "epoch": 0.7499598006110307, "fp16_scale": 1.0, "global_step": 2332, "grad_norm": 2.019912618397986, "learning_rate": 3.1022955561846875e-07, "loss": 0.499, "step": 2332 }, { "ETA": 1.48, "epoch": 0.750281395722785, "fp16_scale": 1.0, "global_step": 2333, "grad_norm": 2.1215830733501826, "learning_rate": 3.0947550186625226e-07, "loss": 0.3759, "step": 2333 }, { "ETA": 1.48, "epoch": 0.7506029908345393, "fp16_scale": 1.0, "global_step": 2334, "grad_norm": 1.9013856707341015, "learning_rate": 3.0872219784278354e-07, "loss": 0.3687, "step": 2334 }, { "ETA": 1.48, "epoch": 0.7509245859462936, "fp16_scale": 1.0, "global_step": 2335, "grad_norm": 1.7948480691627944, "learning_rate": 3.0796964436595376e-07, "loss": 0.3836, "step": 2335 }, { "ETA": 1.47, "epoch": 0.751246181058048, "fp16_scale": 1.0, "global_step": 2336, "grad_norm": 2.116207803452061, "learning_rate": 3.072178422528392e-07, "loss": 0.4881, "step": 2336 }, { "ETA": 1.47, "epoch": 0.7515677761698022, "fp16_scale": 1.0, "global_step": 2337, "grad_norm": 2.049696974555666, "learning_rate": 3.0646679231969954e-07, "loss": 0.464, "step": 2337 }, { "ETA": 1.47, "epoch": 0.7518893712815565, "fp16_scale": 1.0, "global_step": 2338, "grad_norm": 1.8532413576947044, "learning_rate": 3.057164953819787e-07, "loss": 0.3691, "step": 2338 }, { "ETA": 1.47, "epoch": 0.7522109663933109, "fp16_scale": 1.0, "global_step": 2339, "grad_norm": 1.8678216549910127, "learning_rate": 3.0496695225430234e-07, "loss": 0.3593, "step": 2339 }, { "ETA": 1.47, "epoch": 0.7525325615050651, "fp16_scale": 1.0, "global_step": 2340, "grad_norm": 2.1001635734513315, "learning_rate": 3.0421816375047835e-07, "loss": 0.3875, "step": 2340 }, { "ETA": 1.46, "epoch": 0.7528541566168194, "fp16_scale": 1.0, "global_step": 2341, "grad_norm": 1.8501131900814614, "learning_rate": 3.034701306834944e-07, "loss": 0.3891, "step": 2341 }, { "ETA": 1.46, "epoch": 0.7531757517285738, "fp16_scale": 1.0, "global_step": 2342, "grad_norm": 1.9754992574675254, "learning_rate": 3.0272285386551867e-07, "loss": 0.462, "step": 2342 }, { "ETA": 1.46, "epoch": 0.753497346840328, "fp16_scale": 1.0, "global_step": 2343, "grad_norm": 2.1449968620130138, "learning_rate": 3.019763341078986e-07, "loss": 0.4149, "step": 2343 }, { "ETA": 1.46, "epoch": 0.7538189419520823, "fp16_scale": 1.0, "global_step": 2344, "grad_norm": 2.1797413203765705, "learning_rate": 3.012305722211583e-07, "loss": 0.3578, "step": 2344 }, { "ETA": 1.46, "epoch": 0.7541405370638367, "fp16_scale": 1.0, "global_step": 2345, "grad_norm": 2.1557219054361143, "learning_rate": 3.0048556901500067e-07, "loss": 0.433, "step": 2345 }, { "ETA": 1.46, "epoch": 0.7544621321755909, "fp16_scale": 1.0, "global_step": 2346, "grad_norm": 1.8652460311633114, "learning_rate": 2.997413252983033e-07, "loss": 0.3479, "step": 2346 }, { "ETA": 1.45, "epoch": 0.7547837272873452, "fp16_scale": 1.0, "global_step": 2347, "grad_norm": 1.802175048918983, "learning_rate": 2.9899784187912027e-07, "loss": 0.4433, "step": 2347 }, { "ETA": 1.45, "epoch": 0.7551053223990996, "fp16_scale": 1.0, "global_step": 2348, "grad_norm": 1.9417291380518882, "learning_rate": 2.982551195646801e-07, "loss": 0.4191, "step": 2348 }, { "ETA": 1.45, "epoch": 0.7554269175108539, "fp16_scale": 1.0, "global_step": 2349, "grad_norm": 2.0141324436262273, "learning_rate": 2.975131591613842e-07, "loss": 0.4138, "step": 2349 }, { "ETA": 1.45, "epoch": 0.7557485126226081, "fp16_scale": 1.0, "global_step": 2350, "grad_norm": 2.0507366630364565, "learning_rate": 2.9677196147480786e-07, "loss": 0.3968, "step": 2350 }, { "ETA": 1.45, "epoch": 0.7560701077343625, "fp16_scale": 1.0, "global_step": 2351, "grad_norm": 1.8831942011400506, "learning_rate": 2.960315273096968e-07, "loss": 0.3856, "step": 2351 }, { "ETA": 1.44, "epoch": 0.7563917028461168, "fp16_scale": 1.0, "global_step": 2352, "grad_norm": 2.0522410847350034, "learning_rate": 2.952918574699692e-07, "loss": 0.4369, "step": 2352 }, { "ETA": 1.44, "epoch": 0.756713297957871, "fp16_scale": 1.0, "global_step": 2353, "grad_norm": 1.8757701859960927, "learning_rate": 2.9455295275871294e-07, "loss": 0.5093, "step": 2353 }, { "ETA": 1.44, "epoch": 0.7570348930696253, "fp16_scale": 1.0, "global_step": 2354, "grad_norm": 1.9690277816933504, "learning_rate": 2.938148139781844e-07, "loss": 0.4554, "step": 2354 }, { "ETA": 1.44, "epoch": 0.7573564881813797, "fp16_scale": 1.0, "global_step": 2355, "grad_norm": 1.8450578782643, "learning_rate": 2.930774419298097e-07, "loss": 0.4056, "step": 2355 }, { "ETA": 1.44, "epoch": 0.7576780832931339, "fp16_scale": 1.0, "global_step": 2356, "grad_norm": 1.7034100050008794, "learning_rate": 2.923408374141808e-07, "loss": 0.3957, "step": 2356 }, { "ETA": 1.43, "epoch": 0.7579996784048882, "fp16_scale": 1.0, "global_step": 2357, "grad_norm": 2.382291209090017, "learning_rate": 2.91605001231058e-07, "loss": 0.3588, "step": 2357 }, { "ETA": 1.43, "epoch": 0.7583212735166426, "fp16_scale": 1.0, "global_step": 2358, "grad_norm": 1.847158745485686, "learning_rate": 2.9086993417936667e-07, "loss": 0.3489, "step": 2358 }, { "ETA": 1.43, "epoch": 0.7586428686283968, "fp16_scale": 1.0, "global_step": 2359, "grad_norm": 2.1251062018931717, "learning_rate": 2.9013563705719667e-07, "loss": 0.4184, "step": 2359 }, { "ETA": 1.43, "epoch": 0.7589644637401511, "fp16_scale": 1.0, "global_step": 2360, "grad_norm": 1.9086332649739344, "learning_rate": 2.89402110661803e-07, "loss": 0.4698, "step": 2360 }, { "ETA": 1.43, "epoch": 0.7592860588519055, "fp16_scale": 1.0, "global_step": 2361, "grad_norm": 1.8856253887660206, "learning_rate": 2.886693557896024e-07, "loss": 0.5095, "step": 2361 }, { "ETA": 1.42, "epoch": 0.7596076539636597, "fp16_scale": 1.0, "global_step": 2362, "grad_norm": 1.8381726038916812, "learning_rate": 2.879373732361755e-07, "loss": 0.3904, "step": 2362 }, { "ETA": 1.42, "epoch": 0.759929249075414, "fp16_scale": 1.0, "global_step": 2363, "grad_norm": 1.9701301999772292, "learning_rate": 2.8720616379626295e-07, "loss": 0.3614, "step": 2363 }, { "ETA": 1.42, "epoch": 0.7602508441871684, "fp16_scale": 1.0, "global_step": 2364, "grad_norm": 1.9254175024012488, "learning_rate": 2.86475728263767e-07, "loss": 0.3778, "step": 2364 }, { "ETA": 1.42, "epoch": 0.7605724392989227, "fp16_scale": 1.0, "global_step": 2365, "grad_norm": 1.8753050225093335, "learning_rate": 2.857460674317498e-07, "loss": 0.4242, "step": 2365 }, { "ETA": 1.42, "epoch": 0.7608940344106769, "fp16_scale": 1.0, "global_step": 2366, "grad_norm": 2.0529313455452076, "learning_rate": 2.85017182092431e-07, "loss": 0.3934, "step": 2366 }, { "ETA": 1.41, "epoch": 0.7612156295224313, "fp16_scale": 1.0, "global_step": 2367, "grad_norm": 2.0042601366074413, "learning_rate": 2.842890730371901e-07, "loss": 0.5309, "step": 2367 }, { "ETA": 1.41, "epoch": 0.7615372246341856, "fp16_scale": 1.0, "global_step": 2368, "grad_norm": 1.9441968238348006, "learning_rate": 2.8356174105656194e-07, "loss": 0.4102, "step": 2368 }, { "ETA": 1.41, "epoch": 0.7618588197459398, "fp16_scale": 1.0, "global_step": 2369, "grad_norm": 1.841715153459455, "learning_rate": 2.82835186940239e-07, "loss": 0.5004, "step": 2369 }, { "ETA": 1.41, "epoch": 0.7621804148576942, "fp16_scale": 1.0, "global_step": 2370, "grad_norm": 2.018858225491701, "learning_rate": 2.8210941147706914e-07, "loss": 0.4003, "step": 2370 }, { "ETA": 1.41, "epoch": 0.7625020099694485, "fp16_scale": 1.0, "global_step": 2371, "grad_norm": 2.023807966756412, "learning_rate": 2.8138441545505365e-07, "loss": 0.4476, "step": 2371 }, { "ETA": 1.41, "epoch": 0.7628236050812027, "fp16_scale": 1.0, "global_step": 2372, "grad_norm": 1.7877444415267678, "learning_rate": 2.8066019966134904e-07, "loss": 0.4067, "step": 2372 }, { "ETA": 1.4, "epoch": 0.7631452001929571, "fp16_scale": 1.0, "global_step": 2373, "grad_norm": 1.9810597265207228, "learning_rate": 2.7993676488226334e-07, "loss": 0.3652, "step": 2373 }, { "ETA": 1.4, "epoch": 0.7634667953047114, "fp16_scale": 1.0, "global_step": 2374, "grad_norm": 1.8176940727449031, "learning_rate": 2.792141119032575e-07, "loss": 0.4308, "step": 2374 }, { "ETA": 1.4, "epoch": 0.7637883904164656, "fp16_scale": 1.0, "global_step": 2375, "grad_norm": 1.8175822612335957, "learning_rate": 2.784922415089438e-07, "loss": 0.3414, "step": 2375 }, { "ETA": 1.4, "epoch": 0.76410998552822, "fp16_scale": 1.0, "global_step": 2376, "grad_norm": 1.8542997787006494, "learning_rate": 2.7777115448308373e-07, "loss": 0.4791, "step": 2376 }, { "ETA": 1.4, "epoch": 0.7644315806399743, "fp16_scale": 1.0, "global_step": 2377, "grad_norm": 1.906502731802614, "learning_rate": 2.7705085160858955e-07, "loss": 0.4136, "step": 2377 }, { "ETA": 1.39, "epoch": 0.7647531757517285, "fp16_scale": 1.0, "global_step": 2378, "grad_norm": 2.107113884405421, "learning_rate": 2.7633133366752094e-07, "loss": 0.4473, "step": 2378 }, { "ETA": 1.39, "epoch": 0.7650747708634829, "fp16_scale": 1.0, "global_step": 2379, "grad_norm": 2.0367569002273727, "learning_rate": 2.7561260144108624e-07, "loss": 0.4173, "step": 2379 }, { "ETA": 1.39, "epoch": 0.7653963659752372, "fp16_scale": 1.0, "global_step": 2380, "grad_norm": 1.9880387158551962, "learning_rate": 2.7489465570964065e-07, "loss": 0.4354, "step": 2380 }, { "ETA": 1.39, "epoch": 0.7657179610869915, "fp16_scale": 1.0, "global_step": 2381, "grad_norm": 2.0044324427964866, "learning_rate": 2.741774972526847e-07, "loss": 0.4979, "step": 2381 }, { "ETA": 1.39, "epoch": 0.7660395561987458, "fp16_scale": 1.0, "global_step": 2382, "grad_norm": 2.077523049347562, "learning_rate": 2.7346112684886516e-07, "loss": 0.4095, "step": 2382 }, { "ETA": 1.38, "epoch": 0.7663611513105001, "fp16_scale": 1.0, "global_step": 2383, "grad_norm": 2.0103024321551763, "learning_rate": 2.7274554527597206e-07, "loss": 0.4386, "step": 2383 }, { "ETA": 1.38, "epoch": 0.7666827464222544, "fp16_scale": 1.0, "global_step": 2384, "grad_norm": 1.9720379413104863, "learning_rate": 2.7203075331094014e-07, "loss": 0.3507, "step": 2384 }, { "ETA": 1.38, "epoch": 0.7670043415340086, "fp16_scale": 1.0, "global_step": 2385, "grad_norm": 2.0404184176861273, "learning_rate": 2.7131675172984556e-07, "loss": 0.4214, "step": 2385 }, { "ETA": 1.38, "epoch": 0.767325936645763, "fp16_scale": 1.0, "global_step": 2386, "grad_norm": 1.6426678670639059, "learning_rate": 2.7060354130790795e-07, "loss": 0.4091, "step": 2386 }, { "ETA": 1.38, "epoch": 0.7676475317575173, "fp16_scale": 1.0, "global_step": 2387, "grad_norm": 1.876970222583723, "learning_rate": 2.698911228194867e-07, "loss": 0.3538, "step": 2387 }, { "ETA": 1.37, "epoch": 0.7679691268692715, "fp16_scale": 1.0, "global_step": 2388, "grad_norm": 1.981909107063528, "learning_rate": 2.6917949703808107e-07, "loss": 0.4145, "step": 2388 }, { "ETA": 1.37, "epoch": 0.7682907219810259, "fp16_scale": 1.0, "global_step": 2389, "grad_norm": 1.99038324454389, "learning_rate": 2.6846866473633124e-07, "loss": 0.4371, "step": 2389 }, { "ETA": 1.37, "epoch": 0.7686123170927802, "fp16_scale": 1.0, "global_step": 2390, "grad_norm": 2.232733886614169, "learning_rate": 2.67758626686014e-07, "loss": 0.4094, "step": 2390 }, { "ETA": 1.37, "epoch": 0.7689339122045344, "fp16_scale": 1.0, "global_step": 2391, "grad_norm": 2.0280191977141904, "learning_rate": 2.670493836580453e-07, "loss": 0.4044, "step": 2391 }, { "ETA": 1.37, "epoch": 0.7692555073162888, "fp16_scale": 1.0, "global_step": 2392, "grad_norm": 1.969272510274025, "learning_rate": 2.6634093642247737e-07, "loss": 0.4537, "step": 2392 }, { "ETA": 1.37, "epoch": 0.7695771024280431, "fp16_scale": 1.0, "global_step": 2393, "grad_norm": 1.872811807796311, "learning_rate": 2.6563328574849775e-07, "loss": 0.4049, "step": 2393 }, { "ETA": 1.36, "epoch": 0.7698986975397974, "fp16_scale": 1.0, "global_step": 2394, "grad_norm": 1.9581733890124202, "learning_rate": 2.649264324044306e-07, "loss": 0.3751, "step": 2394 }, { "ETA": 1.36, "epoch": 0.7702202926515517, "fp16_scale": 1.0, "global_step": 2395, "grad_norm": 1.9921380649074314, "learning_rate": 2.642203771577326e-07, "loss": 0.4639, "step": 2395 }, { "ETA": 1.36, "epoch": 0.770541887763306, "fp16_scale": 1.0, "global_step": 2396, "grad_norm": 2.255240623215617, "learning_rate": 2.635151207749953e-07, "loss": 0.4606, "step": 2396 }, { "ETA": 1.36, "epoch": 0.7708634828750603, "fp16_scale": 1.0, "global_step": 2397, "grad_norm": 2.0052310782073333, "learning_rate": 2.628106640219424e-07, "loss": 0.4452, "step": 2397 }, { "ETA": 1.36, "epoch": 0.7711850779868146, "fp16_scale": 1.0, "global_step": 2398, "grad_norm": 1.8495776836523259, "learning_rate": 2.621070076634296e-07, "loss": 0.3467, "step": 2398 }, { "ETA": 1.35, "epoch": 0.7715066730985689, "fp16_scale": 1.0, "global_step": 2399, "grad_norm": 1.9785103009015907, "learning_rate": 2.614041524634434e-07, "loss": 0.3818, "step": 2399 }, { "ETA": 1.35, "epoch": 0.7718282682103232, "fp16_scale": 1.0, "global_step": 2400, "grad_norm": 2.1935046919910555, "learning_rate": 2.6070209918509977e-07, "loss": 0.4879, "step": 2400 }, { "ETA": 1.35, "epoch": 0.7721498633220775, "fp16_scale": 1.0, "global_step": 2401, "grad_norm": 2.2724570068349834, "learning_rate": 2.600008485906453e-07, "loss": 0.4166, "step": 2401 }, { "ETA": 1.35, "epoch": 0.7724714584338318, "fp16_scale": 1.0, "global_step": 2402, "grad_norm": 2.2781218984515244, "learning_rate": 2.593004014414544e-07, "loss": 0.4997, "step": 2402 }, { "ETA": 1.35, "epoch": 0.7727930535455861, "fp16_scale": 1.0, "global_step": 2403, "grad_norm": 2.167867864658568, "learning_rate": 2.5860075849802943e-07, "loss": 0.4464, "step": 2403 }, { "ETA": 1.35, "epoch": 0.7731146486573404, "fp16_scale": 1.0, "global_step": 2404, "grad_norm": 1.8598594573970242, "learning_rate": 2.5790192051999917e-07, "loss": 0.3918, "step": 2404 }, { "ETA": 1.35, "epoch": 0.7734362437690947, "fp16_scale": 1.0, "global_step": 2405, "grad_norm": 1.9770572453766835, "learning_rate": 2.572038882661183e-07, "loss": 0.4527, "step": 2405 }, { "ETA": 1.34, "epoch": 0.773757838880849, "fp16_scale": 1.0, "global_step": 2406, "grad_norm": 1.7657481931797248, "learning_rate": 2.565066624942677e-07, "loss": 0.3527, "step": 2406 }, { "ETA": 1.34, "epoch": 0.7740794339926033, "fp16_scale": 1.0, "global_step": 2407, "grad_norm": 2.1870135146732643, "learning_rate": 2.558102439614511e-07, "loss": 0.383, "step": 2407 }, { "ETA": 1.34, "epoch": 0.7744010291043576, "fp16_scale": 1.0, "global_step": 2408, "grad_norm": 1.8817821440102491, "learning_rate": 2.5511463342379714e-07, "loss": 0.4598, "step": 2408 }, { "ETA": 1.34, "epoch": 0.7747226242161119, "fp16_scale": 1.0, "global_step": 2409, "grad_norm": 1.957924103152357, "learning_rate": 2.54419831636557e-07, "loss": 0.4187, "step": 2409 }, { "ETA": 1.34, "epoch": 0.7750442193278663, "fp16_scale": 1.0, "global_step": 2410, "grad_norm": 1.8293307605639495, "learning_rate": 2.5372583935410274e-07, "loss": 0.4452, "step": 2410 }, { "ETA": 1.33, "epoch": 0.7753658144396205, "fp16_scale": 1.0, "global_step": 2411, "grad_norm": 1.95054798880309, "learning_rate": 2.5303265732992885e-07, "loss": 0.4174, "step": 2411 }, { "ETA": 1.33, "epoch": 0.7756874095513748, "fp16_scale": 1.0, "global_step": 2412, "grad_norm": 1.9225610623389644, "learning_rate": 2.5234028631664884e-07, "loss": 0.4868, "step": 2412 }, { "ETA": 1.33, "epoch": 0.7760090046631292, "fp16_scale": 1.0, "global_step": 2413, "grad_norm": 1.9580567396648731, "learning_rate": 2.516487270659966e-07, "loss": 0.4789, "step": 2413 }, { "ETA": 1.33, "epoch": 0.7763305997748834, "fp16_scale": 1.0, "global_step": 2414, "grad_norm": 2.0889732155814107, "learning_rate": 2.5095798032882443e-07, "loss": 0.441, "step": 2414 }, { "ETA": 1.33, "epoch": 0.7766521948866377, "fp16_scale": 1.0, "global_step": 2415, "grad_norm": 2.2663999415670575, "learning_rate": 2.502680468551025e-07, "loss": 0.4823, "step": 2415 }, { "ETA": 1.32, "epoch": 0.7769737899983921, "fp16_scale": 1.0, "global_step": 2416, "grad_norm": 2.1777402998300217, "learning_rate": 2.495789273939176e-07, "loss": 0.4474, "step": 2416 }, { "ETA": 1.32, "epoch": 0.7772953851101463, "fp16_scale": 1.0, "global_step": 2417, "grad_norm": 1.9846358739140422, "learning_rate": 2.4889062269347284e-07, "loss": 0.4434, "step": 2417 }, { "ETA": 1.32, "epoch": 0.7776169802219006, "fp16_scale": 1.0, "global_step": 2418, "grad_norm": 2.052147639969581, "learning_rate": 2.48203133501087e-07, "loss": 0.4001, "step": 2418 }, { "ETA": 1.32, "epoch": 0.777938575333655, "fp16_scale": 1.0, "global_step": 2419, "grad_norm": 2.228658611393801, "learning_rate": 2.475164605631933e-07, "loss": 0.4636, "step": 2419 }, { "ETA": 1.32, "epoch": 0.7782601704454092, "fp16_scale": 1.0, "global_step": 2420, "grad_norm": 2.1585999473649693, "learning_rate": 2.46830604625339e-07, "loss": 0.3789, "step": 2420 }, { "ETA": 1.31, "epoch": 0.7785817655571635, "fp16_scale": 1.0, "global_step": 2421, "grad_norm": 2.1549970635685662, "learning_rate": 2.4614556643218376e-07, "loss": 0.4867, "step": 2421 }, { "ETA": 1.31, "epoch": 0.7789033606689179, "fp16_scale": 1.0, "global_step": 2422, "grad_norm": 1.742901679031034, "learning_rate": 2.4546134672749943e-07, "loss": 0.3308, "step": 2422 }, { "ETA": 1.31, "epoch": 0.7792249557806721, "fp16_scale": 1.0, "global_step": 2423, "grad_norm": 2.0792519984779365, "learning_rate": 2.4477794625416945e-07, "loss": 0.438, "step": 2423 }, { "ETA": 1.31, "epoch": 0.7795465508924264, "fp16_scale": 1.0, "global_step": 2424, "grad_norm": 1.9555432825383154, "learning_rate": 2.4409536575418786e-07, "loss": 0.4247, "step": 2424 }, { "ETA": 1.31, "epoch": 0.7798681460041808, "fp16_scale": 1.0, "global_step": 2425, "grad_norm": 2.170058553322506, "learning_rate": 2.4341360596865865e-07, "loss": 0.3825, "step": 2425 }, { "ETA": 1.3, "epoch": 0.7801897411159351, "fp16_scale": 1.0, "global_step": 2426, "grad_norm": 2.025218796942112, "learning_rate": 2.427326676377939e-07, "loss": 0.3754, "step": 2426 }, { "ETA": 1.3, "epoch": 0.7805113362276893, "fp16_scale": 1.0, "global_step": 2427, "grad_norm": 2.138378640496298, "learning_rate": 2.4205255150091465e-07, "loss": 0.5007, "step": 2427 }, { "ETA": 1.3, "epoch": 0.7808329313394436, "fp16_scale": 1.0, "global_step": 2428, "grad_norm": 1.9477397638022118, "learning_rate": 2.413732582964486e-07, "loss": 0.3996, "step": 2428 }, { "ETA": 1.3, "epoch": 0.781154526451198, "fp16_scale": 1.0, "global_step": 2429, "grad_norm": 1.9310566415678243, "learning_rate": 2.4069478876193014e-07, "loss": 0.4145, "step": 2429 }, { "ETA": 1.3, "epoch": 0.7814761215629522, "fp16_scale": 1.0, "global_step": 2430, "grad_norm": 1.7897591000650312, "learning_rate": 2.4001714363399973e-07, "loss": 0.4507, "step": 2430 }, { "ETA": 1.29, "epoch": 0.7817977166747065, "fp16_scale": 1.0, "global_step": 2431, "grad_norm": 1.775196010156822, "learning_rate": 2.393403236484024e-07, "loss": 0.4017, "step": 2431 }, { "ETA": 1.29, "epoch": 0.7821193117864609, "fp16_scale": 1.0, "global_step": 2432, "grad_norm": 2.0239457154136344, "learning_rate": 2.386643295399878e-07, "loss": 0.4194, "step": 2432 }, { "ETA": 1.29, "epoch": 0.7824409068982151, "fp16_scale": 1.0, "global_step": 2433, "grad_norm": 2.0350327598032734, "learning_rate": 2.3798916204270815e-07, "loss": 0.5327, "step": 2433 }, { "ETA": 1.29, "epoch": 0.7827625020099694, "fp16_scale": 1.0, "global_step": 2434, "grad_norm": 1.9191840675151848, "learning_rate": 2.3731482188961815e-07, "loss": 0.4084, "step": 2434 }, { "ETA": 1.29, "epoch": 0.7830840971217238, "fp16_scale": 1.0, "global_step": 2435, "grad_norm": 1.7817657574658297, "learning_rate": 2.3664130981287488e-07, "loss": 0.4291, "step": 2435 }, { "ETA": 1.29, "epoch": 0.783405692233478, "fp16_scale": 1.0, "global_step": 2436, "grad_norm": 1.7354952296947364, "learning_rate": 2.359686265437363e-07, "loss": 0.4298, "step": 2436 }, { "ETA": 1.28, "epoch": 0.7837272873452323, "fp16_scale": 1.0, "global_step": 2437, "grad_norm": 1.9891033243464187, "learning_rate": 2.3529677281256023e-07, "loss": 0.4185, "step": 2437 }, { "ETA": 1.28, "epoch": 0.7840488824569867, "fp16_scale": 1.0, "global_step": 2438, "grad_norm": 1.9224597958050123, "learning_rate": 2.3462574934880363e-07, "loss": 0.4331, "step": 2438 }, { "ETA": 1.28, "epoch": 0.784370477568741, "fp16_scale": 1.0, "global_step": 2439, "grad_norm": 1.933918397639351, "learning_rate": 2.339555568810221e-07, "loss": 0.3845, "step": 2439 }, { "ETA": 1.28, "epoch": 0.7846920726804952, "fp16_scale": 1.0, "global_step": 2440, "grad_norm": 2.206973782846126, "learning_rate": 2.3328619613686929e-07, "loss": 0.4741, "step": 2440 }, { "ETA": 1.28, "epoch": 0.7850136677922496, "fp16_scale": 1.0, "global_step": 2441, "grad_norm": 1.9425902063024618, "learning_rate": 2.3261766784309566e-07, "loss": 0.4425, "step": 2441 }, { "ETA": 1.27, "epoch": 0.7853352629040039, "fp16_scale": 1.0, "global_step": 2442, "grad_norm": 2.0404531728969673, "learning_rate": 2.3194997272554816e-07, "loss": 0.4983, "step": 2442 }, { "ETA": 1.27, "epoch": 0.7856568580157581, "fp16_scale": 1.0, "global_step": 2443, "grad_norm": 2.200435411586142, "learning_rate": 2.3128311150916823e-07, "loss": 0.4583, "step": 2443 }, { "ETA": 1.27, "epoch": 0.7859784531275125, "fp16_scale": 1.0, "global_step": 2444, "grad_norm": 2.319434408849042, "learning_rate": 2.3061708491799315e-07, "loss": 0.4072, "step": 2444 }, { "ETA": 1.27, "epoch": 0.7863000482392668, "fp16_scale": 1.0, "global_step": 2445, "grad_norm": 1.8877487328761051, "learning_rate": 2.299518936751529e-07, "loss": 0.3687, "step": 2445 }, { "ETA": 1.27, "epoch": 0.786621643351021, "fp16_scale": 1.0, "global_step": 2446, "grad_norm": 1.702793967365356, "learning_rate": 2.2928753850287052e-07, "loss": 0.417, "step": 2446 }, { "ETA": 1.26, "epoch": 0.7869432384627754, "fp16_scale": 1.0, "global_step": 2447, "grad_norm": 1.694650746618412, "learning_rate": 2.2862402012246274e-07, "loss": 0.379, "step": 2447 }, { "ETA": 1.26, "epoch": 0.7872648335745297, "fp16_scale": 1.0, "global_step": 2448, "grad_norm": 2.0788330389654797, "learning_rate": 2.2796133925433604e-07, "loss": 0.4545, "step": 2448 }, { "ETA": 1.26, "epoch": 0.7875864286862839, "fp16_scale": 1.0, "global_step": 2449, "grad_norm": 2.07207971611621, "learning_rate": 2.2729949661798876e-07, "loss": 0.4605, "step": 2449 }, { "ETA": 1.26, "epoch": 0.7879080237980383, "fp16_scale": 1.0, "global_step": 2450, "grad_norm": 1.9467495491281819, "learning_rate": 2.2663849293200833e-07, "loss": 0.354, "step": 2450 }, { "ETA": 1.26, "epoch": 0.7882296189097926, "fp16_scale": 1.0, "global_step": 2451, "grad_norm": 1.8780845183653034, "learning_rate": 2.259783289140713e-07, "loss": 0.3823, "step": 2451 }, { "ETA": 1.25, "epoch": 0.7885512140215468, "fp16_scale": 1.0, "global_step": 2452, "grad_norm": 1.778563629067184, "learning_rate": 2.2531900528094338e-07, "loss": 0.3928, "step": 2452 }, { "ETA": 1.25, "epoch": 0.7888728091333012, "fp16_scale": 1.0, "global_step": 2453, "grad_norm": 2.116978406408106, "learning_rate": 2.2466052274847713e-07, "loss": 0.4129, "step": 2453 }, { "ETA": 1.25, "epoch": 0.7891944042450555, "fp16_scale": 1.0, "global_step": 2454, "grad_norm": 1.7966541557113676, "learning_rate": 2.2400288203161267e-07, "loss": 0.4545, "step": 2454 }, { "ETA": 1.25, "epoch": 0.7895159993568098, "fp16_scale": 1.0, "global_step": 2455, "grad_norm": 1.6463550140147465, "learning_rate": 2.233460838443747e-07, "loss": 0.4503, "step": 2455 }, { "ETA": 1.25, "epoch": 0.789837594468564, "fp16_scale": 1.0, "global_step": 2456, "grad_norm": 2.037057951742195, "learning_rate": 2.226901288998747e-07, "loss": 0.457, "step": 2456 }, { "ETA": 1.25, "epoch": 0.7901591895803184, "fp16_scale": 1.0, "global_step": 2457, "grad_norm": 1.7667875112466476, "learning_rate": 2.2203501791030755e-07, "loss": 0.4695, "step": 2457 }, { "ETA": 1.24, "epoch": 0.7904807846920727, "fp16_scale": 1.0, "global_step": 2458, "grad_norm": 2.1244558523204042, "learning_rate": 2.2138075158695223e-07, "loss": 0.4613, "step": 2458 }, { "ETA": 1.24, "epoch": 0.790802379803827, "fp16_scale": 1.0, "global_step": 2459, "grad_norm": 1.846268770568288, "learning_rate": 2.20727330640171e-07, "loss": 0.4504, "step": 2459 }, { "ETA": 1.24, "epoch": 0.7911239749155813, "fp16_scale": 1.0, "global_step": 2460, "grad_norm": 2.3358489335845514, "learning_rate": 2.2007475577940727e-07, "loss": 0.4058, "step": 2460 }, { "ETA": 1.24, "epoch": 0.7914455700273356, "fp16_scale": 1.0, "global_step": 2461, "grad_norm": 1.8555064537306474, "learning_rate": 2.1942302771318711e-07, "loss": 0.3934, "step": 2461 }, { "ETA": 1.24, "epoch": 0.7917671651390898, "fp16_scale": 1.0, "global_step": 2462, "grad_norm": 1.9481266892670586, "learning_rate": 2.1877214714911573e-07, "loss": 0.4398, "step": 2462 }, { "ETA": 1.23, "epoch": 0.7920887602508442, "fp16_scale": 1.0, "global_step": 2463, "grad_norm": 1.9136236053642883, "learning_rate": 2.1812211479387955e-07, "loss": 0.4187, "step": 2463 }, { "ETA": 1.23, "epoch": 0.7924103553625985, "fp16_scale": 1.0, "global_step": 2464, "grad_norm": 2.0753059041092943, "learning_rate": 2.174729313532433e-07, "loss": 0.4092, "step": 2464 }, { "ETA": 1.23, "epoch": 0.7927319504743527, "fp16_scale": 1.0, "global_step": 2465, "grad_norm": 1.8422628993967345, "learning_rate": 2.1682459753204996e-07, "loss": 0.496, "step": 2465 }, { "ETA": 1.23, "epoch": 0.7930535455861071, "fp16_scale": 1.0, "global_step": 2466, "grad_norm": 1.8599436632392372, "learning_rate": 2.1617711403422067e-07, "loss": 0.438, "step": 2466 }, { "ETA": 1.23, "epoch": 0.7933751406978614, "fp16_scale": 1.0, "global_step": 2467, "grad_norm": 2.0420426155202156, "learning_rate": 2.1553048156275278e-07, "loss": 0.4036, "step": 2467 }, { "ETA": 1.22, "epoch": 0.7936967358096156, "fp16_scale": 1.0, "global_step": 2468, "grad_norm": 1.9215311888129323, "learning_rate": 2.14884700819719e-07, "loss": 0.4464, "step": 2468 }, { "ETA": 1.22, "epoch": 0.79401833092137, "fp16_scale": 1.0, "global_step": 2469, "grad_norm": 1.9252054519149342, "learning_rate": 2.1423977250626935e-07, "loss": 0.4183, "step": 2469 }, { "ETA": 1.22, "epoch": 0.7943399260331243, "fp16_scale": 1.0, "global_step": 2470, "grad_norm": 1.868769489318591, "learning_rate": 2.1359569732262616e-07, "loss": 0.4225, "step": 2470 }, { "ETA": 1.22, "epoch": 0.7946615211448786, "fp16_scale": 1.0, "global_step": 2471, "grad_norm": 1.914731506889317, "learning_rate": 2.1295247596808707e-07, "loss": 0.4664, "step": 2471 }, { "ETA": 1.22, "epoch": 0.7949831162566329, "fp16_scale": 1.0, "global_step": 2472, "grad_norm": 2.1494173384732305, "learning_rate": 2.1231010914102132e-07, "loss": 0.3298, "step": 2472 }, { "ETA": 1.21, "epoch": 0.7953047113683872, "fp16_scale": 1.0, "global_step": 2473, "grad_norm": 1.753058322961423, "learning_rate": 2.1166859753887168e-07, "loss": 0.4025, "step": 2473 }, { "ETA": 1.21, "epoch": 0.7956263064801415, "fp16_scale": 1.0, "global_step": 2474, "grad_norm": 1.987746944860503, "learning_rate": 2.1102794185815097e-07, "loss": 0.4229, "step": 2474 }, { "ETA": 1.21, "epoch": 0.7959479015918958, "fp16_scale": 1.0, "global_step": 2475, "grad_norm": 2.1355958067140475, "learning_rate": 2.1038814279444406e-07, "loss": 0.5087, "step": 2475 }, { "ETA": 1.21, "epoch": 0.7962694967036501, "fp16_scale": 1.0, "global_step": 2476, "grad_norm": 1.915840649659624, "learning_rate": 2.0974920104240524e-07, "loss": 0.3922, "step": 2476 }, { "ETA": 1.21, "epoch": 0.7965910918154044, "fp16_scale": 1.0, "global_step": 2477, "grad_norm": 1.993782877560204, "learning_rate": 2.0911111729575736e-07, "loss": 0.4834, "step": 2477 }, { "ETA": 1.21, "epoch": 0.7969126869271587, "fp16_scale": 1.0, "global_step": 2478, "grad_norm": 2.242003118407737, "learning_rate": 2.0847389224729283e-07, "loss": 0.3983, "step": 2478 }, { "ETA": 1.2, "epoch": 0.797234282038913, "fp16_scale": 1.0, "global_step": 2479, "grad_norm": 2.2093102230788406, "learning_rate": 2.0783752658887066e-07, "loss": 0.4651, "step": 2479 }, { "ETA": 1.2, "epoch": 0.7975558771506673, "fp16_scale": 1.0, "global_step": 2480, "grad_norm": 2.130014627732121, "learning_rate": 2.0720202101141748e-07, "loss": 0.5205, "step": 2480 }, { "ETA": 1.2, "epoch": 0.7978774722624216, "fp16_scale": 1.0, "global_step": 2481, "grad_norm": 2.2184590034367995, "learning_rate": 2.0656737620492627e-07, "loss": 0.429, "step": 2481 }, { "ETA": 1.2, "epoch": 0.7981990673741759, "fp16_scale": 1.0, "global_step": 2482, "grad_norm": 1.801017357698713, "learning_rate": 2.0593359285845436e-07, "loss": 0.391, "step": 2482 }, { "ETA": 1.2, "epoch": 0.7985206624859302, "fp16_scale": 1.0, "global_step": 2483, "grad_norm": 1.891796986862624, "learning_rate": 2.053006716601251e-07, "loss": 0.4492, "step": 2483 }, { "ETA": 1.19, "epoch": 0.7988422575976846, "fp16_scale": 1.0, "global_step": 2484, "grad_norm": 1.9651324001750246, "learning_rate": 2.046686132971247e-07, "loss": 0.4895, "step": 2484 }, { "ETA": 1.19, "epoch": 0.7991638527094388, "fp16_scale": 1.0, "global_step": 2485, "grad_norm": 2.0979890977550104, "learning_rate": 2.0403741845570311e-07, "loss": 0.3495, "step": 2485 }, { "ETA": 1.19, "epoch": 0.7994854478211931, "fp16_scale": 1.0, "global_step": 2486, "grad_norm": 2.0540695022360436, "learning_rate": 2.0340708782117289e-07, "loss": 0.3883, "step": 2486 }, { "ETA": 1.19, "epoch": 0.7998070429329475, "fp16_scale": 1.0, "global_step": 2487, "grad_norm": 1.957244486134892, "learning_rate": 2.027776220779076e-07, "loss": 0.4617, "step": 2487 }, { "ETA": 1.19, "epoch": 0.8001286380447017, "fp16_scale": 1.0, "global_step": 2488, "grad_norm": 1.8549920163018019, "learning_rate": 2.0214902190934259e-07, "loss": 0.401, "step": 2488 }, { "ETA": 1.18, "epoch": 0.800450233156456, "fp16_scale": 1.0, "global_step": 2489, "grad_norm": 2.012581416443408, "learning_rate": 2.0152128799797253e-07, "loss": 0.4624, "step": 2489 }, { "ETA": 1.18, "epoch": 0.8007718282682104, "fp16_scale": 1.0, "global_step": 2490, "grad_norm": 2.083048801769933, "learning_rate": 2.0089442102535238e-07, "loss": 0.4228, "step": 2490 }, { "ETA": 1.18, "epoch": 0.8010934233799646, "fp16_scale": 1.0, "global_step": 2491, "grad_norm": 2.0238387783724363, "learning_rate": 2.0026842167209557e-07, "loss": 0.4077, "step": 2491 }, { "ETA": 1.18, "epoch": 0.8014150184917189, "fp16_scale": 1.0, "global_step": 2492, "grad_norm": 1.9708344518247611, "learning_rate": 1.99643290617873e-07, "loss": 0.3877, "step": 2492 }, { "ETA": 1.18, "epoch": 0.8017366136034733, "fp16_scale": 1.0, "global_step": 2493, "grad_norm": 2.045313160170974, "learning_rate": 1.9901902854141384e-07, "loss": 0.4108, "step": 2493 }, { "ETA": 1.17, "epoch": 0.8020582087152275, "fp16_scale": 1.0, "global_step": 2494, "grad_norm": 1.7795147824502908, "learning_rate": 1.983956361205027e-07, "loss": 0.3458, "step": 2494 }, { "ETA": 1.17, "epoch": 0.8023798038269818, "fp16_scale": 1.0, "global_step": 2495, "grad_norm": 2.11604102663727, "learning_rate": 1.9777311403198084e-07, "loss": 0.4683, "step": 2495 }, { "ETA": 1.17, "epoch": 0.8027013989387362, "fp16_scale": 1.0, "global_step": 2496, "grad_norm": 1.9364654543210857, "learning_rate": 1.971514629517438e-07, "loss": 0.4086, "step": 2496 }, { "ETA": 1.17, "epoch": 0.8030229940504904, "fp16_scale": 1.0, "global_step": 2497, "grad_norm": 1.7758602896327396, "learning_rate": 1.9653068355474212e-07, "loss": 0.4368, "step": 2497 }, { "ETA": 1.17, "epoch": 0.8033445891622447, "fp16_scale": 1.0, "global_step": 2498, "grad_norm": 2.0880662850925247, "learning_rate": 1.9591077651497977e-07, "loss": 0.389, "step": 2498 }, { "ETA": 1.17, "epoch": 0.803666184273999, "fp16_scale": 1.0, "global_step": 2499, "grad_norm": 2.053422363344973, "learning_rate": 1.9529174250551306e-07, "loss": 0.4646, "step": 2499 }, { "ETA": 1.16, "epoch": 0.8039877793857534, "fp16_scale": 1.0, "global_step": 2500, "grad_norm": 1.9842140172033078, "learning_rate": 1.946735821984513e-07, "loss": 0.409, "step": 2500 }, { "ETA": 1.16, "epoch": 0.8043093744975076, "fp16_scale": 1.0, "global_step": 2501, "grad_norm": 2.2112034969132637, "learning_rate": 1.94056296264954e-07, "loss": 0.4633, "step": 2501 }, { "ETA": 1.16, "epoch": 0.804630969609262, "fp16_scale": 1.0, "global_step": 2502, "grad_norm": 1.9870326488709018, "learning_rate": 1.9343988537523236e-07, "loss": 0.3735, "step": 2502 }, { "ETA": 1.16, "epoch": 0.8049525647210163, "fp16_scale": 1.0, "global_step": 2503, "grad_norm": 1.994764445023778, "learning_rate": 1.928243501985475e-07, "loss": 0.4333, "step": 2503 }, { "ETA": 1.16, "epoch": 0.8052741598327705, "fp16_scale": 1.0, "global_step": 2504, "grad_norm": 1.9549067264880902, "learning_rate": 1.9220969140320887e-07, "loss": 0.4622, "step": 2504 }, { "ETA": 1.15, "epoch": 0.8055957549445248, "fp16_scale": 1.0, "global_step": 2505, "grad_norm": 2.040741415297998, "learning_rate": 1.9159590965657534e-07, "loss": 0.4144, "step": 2505 }, { "ETA": 1.15, "epoch": 0.8059173500562792, "fp16_scale": 1.0, "global_step": 2506, "grad_norm": 1.9184951697946278, "learning_rate": 1.9098300562505264e-07, "loss": 0.4467, "step": 2506 }, { "ETA": 1.15, "epoch": 0.8062389451680334, "fp16_scale": 1.0, "global_step": 2507, "grad_norm": 1.9242915649426138, "learning_rate": 1.9037097997409436e-07, "loss": 0.4843, "step": 2507 }, { "ETA": 1.15, "epoch": 0.8065605402797877, "fp16_scale": 1.0, "global_step": 2508, "grad_norm": 2.276806397279408, "learning_rate": 1.8975983336820022e-07, "loss": 0.4614, "step": 2508 }, { "ETA": 1.15, "epoch": 0.8068821353915421, "fp16_scale": 1.0, "global_step": 2509, "grad_norm": 2.0156629218691373, "learning_rate": 1.8914956647091497e-07, "loss": 0.4553, "step": 2509 }, { "ETA": 1.14, "epoch": 0.8072037305032963, "fp16_scale": 1.0, "global_step": 2510, "grad_norm": 2.0442056998554716, "learning_rate": 1.8854017994482908e-07, "loss": 0.4204, "step": 2510 }, { "ETA": 1.14, "epoch": 0.8075253256150506, "fp16_scale": 1.0, "global_step": 2511, "grad_norm": 2.264490983935522, "learning_rate": 1.8793167445157608e-07, "loss": 0.4189, "step": 2511 }, { "ETA": 1.14, "epoch": 0.807846920726805, "fp16_scale": 1.0, "global_step": 2512, "grad_norm": 1.9185061905528236, "learning_rate": 1.8732405065183432e-07, "loss": 0.4458, "step": 2512 }, { "ETA": 1.14, "epoch": 0.8081685158385593, "fp16_scale": 1.0, "global_step": 2513, "grad_norm": 1.8712179718797177, "learning_rate": 1.8671730920532335e-07, "loss": 0.3941, "step": 2513 }, { "ETA": 1.14, "epoch": 0.8084901109503135, "fp16_scale": 1.0, "global_step": 2514, "grad_norm": 1.7769623459440849, "learning_rate": 1.8611145077080592e-07, "loss": 0.3725, "step": 2514 }, { "ETA": 1.14, "epoch": 0.8088117060620679, "fp16_scale": 1.0, "global_step": 2515, "grad_norm": 1.7695957466228178, "learning_rate": 1.8550647600608572e-07, "loss": 0.3891, "step": 2515 }, { "ETA": 1.13, "epoch": 0.8091333011738222, "fp16_scale": 1.0, "global_step": 2516, "grad_norm": 1.9458140690971688, "learning_rate": 1.8490238556800641e-07, "loss": 0.3958, "step": 2516 }, { "ETA": 1.13, "epoch": 0.8094548962855764, "fp16_scale": 1.0, "global_step": 2517, "grad_norm": 1.9830296884981868, "learning_rate": 1.842991801124526e-07, "loss": 0.4352, "step": 2517 }, { "ETA": 1.13, "epoch": 0.8097764913973308, "fp16_scale": 1.0, "global_step": 2518, "grad_norm": 2.119460266750053, "learning_rate": 1.8369686029434673e-07, "loss": 0.4216, "step": 2518 }, { "ETA": 1.13, "epoch": 0.8100980865090851, "fp16_scale": 1.0, "global_step": 2519, "grad_norm": 2.100373649115543, "learning_rate": 1.830954267676509e-07, "loss": 0.4544, "step": 2519 }, { "ETA": 1.13, "epoch": 0.8104196816208393, "fp16_scale": 1.0, "global_step": 2520, "grad_norm": 2.1256103975037255, "learning_rate": 1.824948801853643e-07, "loss": 0.4149, "step": 2520 }, { "ETA": 1.12, "epoch": 0.8107412767325937, "fp16_scale": 1.0, "global_step": 2521, "grad_norm": 1.8692798779298303, "learning_rate": 1.8189522119952304e-07, "loss": 0.4557, "step": 2521 }, { "ETA": 1.12, "epoch": 0.811062871844348, "fp16_scale": 1.0, "global_step": 2522, "grad_norm": 2.1134773429500235, "learning_rate": 1.8129645046120002e-07, "loss": 0.3747, "step": 2522 }, { "ETA": 1.12, "epoch": 0.8113844669561022, "fp16_scale": 1.0, "global_step": 2523, "grad_norm": 1.943583459154084, "learning_rate": 1.8069856862050303e-07, "loss": 0.3523, "step": 2523 }, { "ETA": 1.12, "epoch": 0.8117060620678566, "fp16_scale": 1.0, "global_step": 2524, "grad_norm": 1.9621807831039595, "learning_rate": 1.801015763265754e-07, "loss": 0.5122, "step": 2524 }, { "ETA": 1.12, "epoch": 0.8120276571796109, "fp16_scale": 1.0, "global_step": 2525, "grad_norm": 1.8613041917834738, "learning_rate": 1.7950547422759454e-07, "loss": 0.416, "step": 2525 }, { "ETA": 1.11, "epoch": 0.8123492522913651, "fp16_scale": 1.0, "global_step": 2526, "grad_norm": 2.0614993988677512, "learning_rate": 1.7891026297077094e-07, "loss": 0.4028, "step": 2526 }, { "ETA": 1.11, "epoch": 0.8126708474031195, "fp16_scale": 1.0, "global_step": 2527, "grad_norm": 1.9527363939156346, "learning_rate": 1.7831594320234844e-07, "loss": 0.4435, "step": 2527 }, { "ETA": 1.11, "epoch": 0.8129924425148738, "fp16_scale": 1.0, "global_step": 2528, "grad_norm": 2.047131825759397, "learning_rate": 1.777225155676021e-07, "loss": 0.4773, "step": 2528 }, { "ETA": 1.11, "epoch": 0.8133140376266281, "fp16_scale": 1.0, "global_step": 2529, "grad_norm": 2.3245541025753864, "learning_rate": 1.771299807108394e-07, "loss": 0.4465, "step": 2529 }, { "ETA": 1.11, "epoch": 0.8136356327383824, "fp16_scale": 1.0, "global_step": 2530, "grad_norm": 2.209518083176055, "learning_rate": 1.7653833927539773e-07, "loss": 0.4224, "step": 2530 }, { "ETA": 1.1, "epoch": 0.8139572278501367, "fp16_scale": 1.0, "global_step": 2531, "grad_norm": 1.865852104287864, "learning_rate": 1.7594759190364517e-07, "loss": 0.4575, "step": 2531 }, { "ETA": 1.1, "epoch": 0.814278822961891, "fp16_scale": 1.0, "global_step": 2532, "grad_norm": 2.3157858690210467, "learning_rate": 1.7535773923697828e-07, "loss": 0.464, "step": 2532 }, { "ETA": 1.1, "epoch": 0.8146004180736452, "fp16_scale": 1.0, "global_step": 2533, "grad_norm": 1.8916738019343446, "learning_rate": 1.7476878191582245e-07, "loss": 0.4532, "step": 2533 }, { "ETA": 1.1, "epoch": 0.8149220131853996, "fp16_scale": 1.0, "global_step": 2534, "grad_norm": 2.141556612925703, "learning_rate": 1.741807205796314e-07, "loss": 0.4473, "step": 2534 }, { "ETA": 1.1, "epoch": 0.8152436082971539, "fp16_scale": 1.0, "global_step": 2535, "grad_norm": 1.8035567061387987, "learning_rate": 1.7359355586688506e-07, "loss": 0.4375, "step": 2535 }, { "ETA": 1.09, "epoch": 0.8155652034089081, "fp16_scale": 1.0, "global_step": 2536, "grad_norm": 1.8545618445407024, "learning_rate": 1.7300728841509161e-07, "loss": 0.3985, "step": 2536 }, { "ETA": 1.09, "epoch": 0.8158867985206625, "fp16_scale": 1.0, "global_step": 2537, "grad_norm": 1.9846775897391127, "learning_rate": 1.7242191886078328e-07, "loss": 0.4467, "step": 2537 }, { "ETA": 1.09, "epoch": 0.8162083936324168, "fp16_scale": 1.0, "global_step": 2538, "grad_norm": 2.013589406256994, "learning_rate": 1.7183744783951792e-07, "loss": 0.4546, "step": 2538 }, { "ETA": 1.09, "epoch": 0.816529988744171, "fp16_scale": 1.0, "global_step": 2539, "grad_norm": 2.1691878399885316, "learning_rate": 1.712538759858786e-07, "loss": 0.4676, "step": 2539 }, { "ETA": 1.09, "epoch": 0.8168515838559254, "fp16_scale": 1.0, "global_step": 2540, "grad_norm": 2.1063781504713495, "learning_rate": 1.7067120393347078e-07, "loss": 0.4208, "step": 2540 }, { "ETA": 1.09, "epoch": 0.8171731789676797, "fp16_scale": 1.0, "global_step": 2541, "grad_norm": 1.8546166854224735, "learning_rate": 1.700894323149241e-07, "loss": 0.4607, "step": 2541 }, { "ETA": 1.08, "epoch": 0.8174947740794339, "fp16_scale": 1.0, "global_step": 2542, "grad_norm": 1.9700426747155693, "learning_rate": 1.6950856176189032e-07, "loss": 0.347, "step": 2542 }, { "ETA": 1.08, "epoch": 0.8178163691911883, "fp16_scale": 1.0, "global_step": 2543, "grad_norm": 1.8289973718165615, "learning_rate": 1.6892859290504236e-07, "loss": 0.4975, "step": 2543 }, { "ETA": 1.08, "epoch": 0.8181379643029426, "fp16_scale": 1.0, "global_step": 2544, "grad_norm": 1.9524794021852336, "learning_rate": 1.6834952637407484e-07, "loss": 0.3824, "step": 2544 }, { "ETA": 1.08, "epoch": 0.818459559414697, "fp16_scale": 1.0, "global_step": 2545, "grad_norm": 2.019513824256391, "learning_rate": 1.6777136279770198e-07, "loss": 0.3291, "step": 2545 }, { "ETA": 1.08, "epoch": 0.8187811545264512, "fp16_scale": 1.0, "global_step": 2546, "grad_norm": 1.9393061584107576, "learning_rate": 1.671941028036582e-07, "loss": 0.4279, "step": 2546 }, { "ETA": 1.07, "epoch": 0.8191027496382055, "fp16_scale": 1.0, "global_step": 2547, "grad_norm": 1.9017947170826133, "learning_rate": 1.666177470186967e-07, "loss": 0.382, "step": 2547 }, { "ETA": 1.07, "epoch": 0.8194243447499598, "fp16_scale": 1.0, "global_step": 2548, "grad_norm": 2.101805217901928, "learning_rate": 1.6604229606858898e-07, "loss": 0.4719, "step": 2548 }, { "ETA": 1.07, "epoch": 0.8197459398617141, "fp16_scale": 1.0, "global_step": 2549, "grad_norm": 1.889196482177124, "learning_rate": 1.65467750578124e-07, "loss": 0.4763, "step": 2549 }, { "ETA": 1.07, "epoch": 0.8200675349734684, "fp16_scale": 1.0, "global_step": 2550, "grad_norm": 1.9490867359181265, "learning_rate": 1.648941111711073e-07, "loss": 0.4549, "step": 2550 }, { "ETA": 1.07, "epoch": 0.8203891300852227, "fp16_scale": 1.0, "global_step": 2551, "grad_norm": 1.776545407263281, "learning_rate": 1.6432137847036142e-07, "loss": 0.4146, "step": 2551 }, { "ETA": 1.06, "epoch": 0.820710725196977, "fp16_scale": 1.0, "global_step": 2552, "grad_norm": 1.9405113251265456, "learning_rate": 1.6374955309772408e-07, "loss": 0.4236, "step": 2552 }, { "ETA": 1.06, "epoch": 0.8210323203087313, "fp16_scale": 1.0, "global_step": 2553, "grad_norm": 2.1506190989888707, "learning_rate": 1.631786356740479e-07, "loss": 0.43, "step": 2553 }, { "ETA": 1.06, "epoch": 0.8213539154204856, "fp16_scale": 1.0, "global_step": 2554, "grad_norm": 1.9605395718845724, "learning_rate": 1.6260862681919962e-07, "loss": 0.3987, "step": 2554 }, { "ETA": 1.06, "epoch": 0.8216755105322399, "fp16_scale": 1.0, "global_step": 2555, "grad_norm": 1.9439136460842648, "learning_rate": 1.6203952715205916e-07, "loss": 0.4473, "step": 2555 }, { "ETA": 1.06, "epoch": 0.8219971056439942, "fp16_scale": 1.0, "global_step": 2556, "grad_norm": 1.8452912797379448, "learning_rate": 1.6147133729052042e-07, "loss": 0.4235, "step": 2556 }, { "ETA": 1.05, "epoch": 0.8223187007557485, "fp16_scale": 1.0, "global_step": 2557, "grad_norm": 1.9669060855358702, "learning_rate": 1.6090405785148786e-07, "loss": 0.4801, "step": 2557 }, { "ETA": 1.05, "epoch": 0.8226402958675029, "fp16_scale": 1.0, "global_step": 2558, "grad_norm": 1.9688328574724991, "learning_rate": 1.6033768945087934e-07, "loss": 0.4557, "step": 2558 }, { "ETA": 1.05, "epoch": 0.8229618909792571, "fp16_scale": 1.0, "global_step": 2559, "grad_norm": 1.937784146822814, "learning_rate": 1.5977223270362194e-07, "loss": 0.4149, "step": 2559 }, { "ETA": 1.05, "epoch": 0.8232834860910114, "fp16_scale": 1.0, "global_step": 2560, "grad_norm": 1.8457875986594938, "learning_rate": 1.5920768822365416e-07, "loss": 0.4222, "step": 2560 }, { "ETA": 1.05, "epoch": 0.8236050812027658, "fp16_scale": 1.0, "global_step": 2561, "grad_norm": 2.1278834935103, "learning_rate": 1.58644056623923e-07, "loss": 0.3592, "step": 2561 }, { "ETA": 1.05, "epoch": 0.82392667631452, "fp16_scale": 1.0, "global_step": 2562, "grad_norm": 1.9791492685702856, "learning_rate": 1.5808133851638472e-07, "loss": 0.4564, "step": 2562 }, { "ETA": 1.04, "epoch": 0.8242482714262743, "fp16_scale": 1.0, "global_step": 2563, "grad_norm": 1.923078974095852, "learning_rate": 1.5751953451200384e-07, "loss": 0.3662, "step": 2563 }, { "ETA": 1.04, "epoch": 0.8245698665380287, "fp16_scale": 1.0, "global_step": 2564, "grad_norm": 2.2040797032388064, "learning_rate": 1.5695864522075254e-07, "loss": 0.3789, "step": 2564 }, { "ETA": 1.04, "epoch": 0.8248914616497829, "fp16_scale": 1.0, "global_step": 2565, "grad_norm": 1.8952652841970785, "learning_rate": 1.563986712516099e-07, "loss": 0.4193, "step": 2565 }, { "ETA": 1.04, "epoch": 0.8252130567615372, "fp16_scale": 1.0, "global_step": 2566, "grad_norm": 1.9349291792067729, "learning_rate": 1.5583961321256056e-07, "loss": 0.419, "step": 2566 }, { "ETA": 1.04, "epoch": 0.8255346518732916, "fp16_scale": 1.0, "global_step": 2567, "grad_norm": 2.2330237281409895, "learning_rate": 1.5528147171059514e-07, "loss": 0.4592, "step": 2567 }, { "ETA": 1.03, "epoch": 0.8258562469850458, "fp16_scale": 1.0, "global_step": 2568, "grad_norm": 2.0969844108060323, "learning_rate": 1.547242473517092e-07, "loss": 0.4139, "step": 2568 }, { "ETA": 1.03, "epoch": 0.8261778420968001, "fp16_scale": 1.0, "global_step": 2569, "grad_norm": 2.0395160920440794, "learning_rate": 1.5416794074090255e-07, "loss": 0.328, "step": 2569 }, { "ETA": 1.03, "epoch": 0.8264994372085545, "fp16_scale": 1.0, "global_step": 2570, "grad_norm": 2.068855299846136, "learning_rate": 1.5361255248217864e-07, "loss": 0.4236, "step": 2570 }, { "ETA": 1.03, "epoch": 0.8268210323203087, "fp16_scale": 1.0, "global_step": 2571, "grad_norm": 2.210493605981183, "learning_rate": 1.530580831785434e-07, "loss": 0.5278, "step": 2571 }, { "ETA": 1.03, "epoch": 0.827142627432063, "fp16_scale": 1.0, "global_step": 2572, "grad_norm": 2.020852969906599, "learning_rate": 1.525045334320051e-07, "loss": 0.4251, "step": 2572 }, { "ETA": 1.02, "epoch": 0.8274642225438174, "fp16_scale": 1.0, "global_step": 2573, "grad_norm": 2.2499177103886714, "learning_rate": 1.5195190384357404e-07, "loss": 0.4723, "step": 2573 }, { "ETA": 1.02, "epoch": 0.8277858176555717, "fp16_scale": 1.0, "global_step": 2574, "grad_norm": 1.9345280398813554, "learning_rate": 1.5140019501326108e-07, "loss": 0.4186, "step": 2574 }, { "ETA": 1.02, "epoch": 0.8281074127673259, "fp16_scale": 1.0, "global_step": 2575, "grad_norm": 2.0608889214705353, "learning_rate": 1.5084940754007792e-07, "loss": 0.3819, "step": 2575 }, { "ETA": 1.02, "epoch": 0.8284290078790802, "fp16_scale": 1.0, "global_step": 2576, "grad_norm": 2.0624830140043606, "learning_rate": 1.5029954202203487e-07, "loss": 0.408, "step": 2576 }, { "ETA": 1.02, "epoch": 0.8287506029908346, "fp16_scale": 1.0, "global_step": 2577, "grad_norm": 2.0575361441388993, "learning_rate": 1.497505990561424e-07, "loss": 0.4474, "step": 2577 }, { "ETA": 1.01, "epoch": 0.8290721981025888, "fp16_scale": 1.0, "global_step": 2578, "grad_norm": 1.9315086577025948, "learning_rate": 1.4920257923840862e-07, "loss": 0.4437, "step": 2578 }, { "ETA": 1.01, "epoch": 0.8293937932143431, "fp16_scale": 1.0, "global_step": 2579, "grad_norm": 2.1467622948579743, "learning_rate": 1.4865548316383892e-07, "loss": 0.3698, "step": 2579 }, { "ETA": 1.01, "epoch": 0.8297153883260975, "fp16_scale": 1.0, "global_step": 2580, "grad_norm": 1.9042671934926998, "learning_rate": 1.4810931142643734e-07, "loss": 0.4359, "step": 2580 }, { "ETA": 1.01, "epoch": 0.8300369834378517, "fp16_scale": 1.0, "global_step": 2581, "grad_norm": 2.111438866818042, "learning_rate": 1.475640646192028e-07, "loss": 0.4663, "step": 2581 }, { "ETA": 1.01, "epoch": 0.830358578549606, "fp16_scale": 1.0, "global_step": 2582, "grad_norm": 1.850302035062646, "learning_rate": 1.470197433341307e-07, "loss": 0.3967, "step": 2582 }, { "ETA": 1.0, "epoch": 0.8306801736613604, "fp16_scale": 1.0, "global_step": 2583, "grad_norm": 2.000143436859863, "learning_rate": 1.4647634816221132e-07, "loss": 0.4992, "step": 2583 }, { "ETA": 1.0, "epoch": 0.8310017687731146, "fp16_scale": 1.0, "global_step": 2584, "grad_norm": 1.7734052413804926, "learning_rate": 1.459338796934293e-07, "loss": 0.4448, "step": 2584 }, { "ETA": 1.0, "epoch": 0.8313233638848689, "fp16_scale": 1.0, "global_step": 2585, "grad_norm": 1.9917405909212338, "learning_rate": 1.4539233851676346e-07, "loss": 0.4446, "step": 2585 }, { "ETA": 1.0, "epoch": 0.8316449589966233, "fp16_scale": 1.0, "global_step": 2586, "grad_norm": 1.9828738133796069, "learning_rate": 1.4485172522018573e-07, "loss": 0.3661, "step": 2586 }, { "ETA": 1.0, "epoch": 0.8319665541083775, "fp16_scale": 1.0, "global_step": 2587, "grad_norm": 1.8872427807549932, "learning_rate": 1.443120403906608e-07, "loss": 0.388, "step": 2587 }, { "ETA": 0.99, "epoch": 0.8322881492201318, "fp16_scale": 1.0, "global_step": 2588, "grad_norm": 1.8174817377215846, "learning_rate": 1.4377328461414462e-07, "loss": 0.3715, "step": 2588 }, { "ETA": 0.99, "epoch": 0.8326097443318862, "fp16_scale": 1.0, "global_step": 2589, "grad_norm": 2.1715968227378024, "learning_rate": 1.4323545847558517e-07, "loss": 0.4259, "step": 2589 }, { "ETA": 0.99, "epoch": 0.8329313394436405, "fp16_scale": 1.0, "global_step": 2590, "grad_norm": 1.9014250535210901, "learning_rate": 1.4269856255892033e-07, "loss": 0.3336, "step": 2590 }, { "ETA": 0.99, "epoch": 0.8332529345553947, "fp16_scale": 1.0, "global_step": 2591, "grad_norm": 1.9357048316498084, "learning_rate": 1.421625974470788e-07, "loss": 0.4073, "step": 2591 }, { "ETA": 0.99, "epoch": 0.8335745296671491, "fp16_scale": 1.0, "global_step": 2592, "grad_norm": 2.1649140880066517, "learning_rate": 1.416275637219786e-07, "loss": 0.4152, "step": 2592 }, { "ETA": 0.99, "epoch": 0.8338961247789034, "fp16_scale": 1.0, "global_step": 2593, "grad_norm": 1.8427275663772384, "learning_rate": 1.4109346196452553e-07, "loss": 0.4478, "step": 2593 }, { "ETA": 0.98, "epoch": 0.8342177198906576, "fp16_scale": 1.0, "global_step": 2594, "grad_norm": 1.7796464256515407, "learning_rate": 1.4056029275461478e-07, "loss": 0.4066, "step": 2594 }, { "ETA": 0.98, "epoch": 0.834539315002412, "fp16_scale": 1.0, "global_step": 2595, "grad_norm": 1.9569002544326037, "learning_rate": 1.4002805667112817e-07, "loss": 0.4346, "step": 2595 }, { "ETA": 0.98, "epoch": 0.8348609101141663, "fp16_scale": 1.0, "global_step": 2596, "grad_norm": 2.178954877658416, "learning_rate": 1.3949675429193465e-07, "loss": 0.4346, "step": 2596 }, { "ETA": 0.98, "epoch": 0.8351825052259205, "fp16_scale": 1.0, "global_step": 2597, "grad_norm": 2.083800666836877, "learning_rate": 1.3896638619388978e-07, "loss": 0.3998, "step": 2597 }, { "ETA": 0.98, "epoch": 0.8355041003376749, "fp16_scale": 1.0, "global_step": 2598, "grad_norm": 1.98421065941732, "learning_rate": 1.3843695295283408e-07, "loss": 0.3668, "step": 2598 }, { "ETA": 0.97, "epoch": 0.8358256954494292, "fp16_scale": 1.0, "global_step": 2599, "grad_norm": 1.9859012175962345, "learning_rate": 1.379084551435936e-07, "loss": 0.4887, "step": 2599 }, { "ETA": 0.97, "epoch": 0.8361472905611834, "fp16_scale": 1.0, "global_step": 2600, "grad_norm": 1.9265365356699595, "learning_rate": 1.373808933399785e-07, "loss": 0.4329, "step": 2600 }, { "ETA": 0.97, "epoch": 0.8364688856729378, "fp16_scale": 1.0, "global_step": 2601, "grad_norm": 1.9302765311369212, "learning_rate": 1.368542681147824e-07, "loss": 0.3832, "step": 2601 }, { "ETA": 0.97, "epoch": 0.8367904807846921, "fp16_scale": 1.0, "global_step": 2602, "grad_norm": 2.0436185754092717, "learning_rate": 1.3632858003978264e-07, "loss": 0.44, "step": 2602 }, { "ETA": 0.97, "epoch": 0.8371120758964464, "fp16_scale": 1.0, "global_step": 2603, "grad_norm": 2.0618549069106717, "learning_rate": 1.358038296857389e-07, "loss": 0.4329, "step": 2603 }, { "ETA": 0.97, "epoch": 0.8374336710082007, "fp16_scale": 1.0, "global_step": 2604, "grad_norm": 1.9472952381372493, "learning_rate": 1.352800176223926e-07, "loss": 0.4845, "step": 2604 }, { "ETA": 0.96, "epoch": 0.837755266119955, "fp16_scale": 1.0, "global_step": 2605, "grad_norm": 1.8453708180134525, "learning_rate": 1.3475714441846608e-07, "loss": 0.4239, "step": 2605 }, { "ETA": 0.96, "epoch": 0.8380768612317093, "fp16_scale": 1.0, "global_step": 2606, "grad_norm": 1.8924860485851713, "learning_rate": 1.3423521064166333e-07, "loss": 0.4157, "step": 2606 }, { "ETA": 0.96, "epoch": 0.8383984563434635, "fp16_scale": 1.0, "global_step": 2607, "grad_norm": 1.9001377997466726, "learning_rate": 1.3371421685866702e-07, "loss": 0.3487, "step": 2607 }, { "ETA": 0.96, "epoch": 0.8387200514552179, "fp16_scale": 1.0, "global_step": 2608, "grad_norm": 2.068520573315801, "learning_rate": 1.3319416363514025e-07, "loss": 0.4534, "step": 2608 }, { "ETA": 0.96, "epoch": 0.8390416465669722, "fp16_scale": 1.0, "global_step": 2609, "grad_norm": 2.1990426728773427, "learning_rate": 1.32675051535725e-07, "loss": 0.36, "step": 2609 }, { "ETA": 0.95, "epoch": 0.8393632416787264, "fp16_scale": 1.0, "global_step": 2610, "grad_norm": 1.8830612508120763, "learning_rate": 1.3215688112404043e-07, "loss": 0.4514, "step": 2610 }, { "ETA": 0.95, "epoch": 0.8396848367904808, "fp16_scale": 1.0, "global_step": 2611, "grad_norm": 1.9048686120765927, "learning_rate": 1.316396529626843e-07, "loss": 0.4104, "step": 2611 }, { "ETA": 0.95, "epoch": 0.8400064319022351, "fp16_scale": 1.0, "global_step": 2612, "grad_norm": 1.9535023774292661, "learning_rate": 1.311233676132306e-07, "loss": 0.3632, "step": 2612 }, { "ETA": 0.95, "epoch": 0.8403280270139893, "fp16_scale": 1.0, "global_step": 2613, "grad_norm": 1.9993295777536153, "learning_rate": 1.306080256362302e-07, "loss": 0.4098, "step": 2613 }, { "ETA": 0.95, "epoch": 0.8406496221257437, "fp16_scale": 1.0, "global_step": 2614, "grad_norm": 1.9916304814324868, "learning_rate": 1.3009362759120978e-07, "loss": 0.4332, "step": 2614 }, { "ETA": 0.94, "epoch": 0.840971217237498, "fp16_scale": 1.0, "global_step": 2615, "grad_norm": 1.9272510478037563, "learning_rate": 1.2958017403667033e-07, "loss": 0.3614, "step": 2615 }, { "ETA": 0.94, "epoch": 0.8412928123492522, "fp16_scale": 1.0, "global_step": 2616, "grad_norm": 2.10425792824437, "learning_rate": 1.2906766553008842e-07, "loss": 0.341, "step": 2616 }, { "ETA": 0.94, "epoch": 0.8416144074610066, "fp16_scale": 1.0, "global_step": 2617, "grad_norm": 1.9511163169652415, "learning_rate": 1.285561026279136e-07, "loss": 0.4548, "step": 2617 }, { "ETA": 0.94, "epoch": 0.8419360025727609, "fp16_scale": 1.0, "global_step": 2618, "grad_norm": 2.0474974091401017, "learning_rate": 1.280454858855694e-07, "loss": 0.3889, "step": 2618 }, { "ETA": 0.94, "epoch": 0.8422575976845152, "fp16_scale": 1.0, "global_step": 2619, "grad_norm": 3.18643441734776, "learning_rate": 1.2753581585745222e-07, "loss": 0.4233, "step": 2619 }, { "ETA": 0.94, "epoch": 0.8425791927962695, "fp16_scale": 1.0, "global_step": 2620, "grad_norm": 1.8786996363129471, "learning_rate": 1.2702709309692962e-07, "loss": 0.4216, "step": 2620 }, { "ETA": 0.93, "epoch": 0.8429007879080238, "fp16_scale": 1.0, "global_step": 2621, "grad_norm": 1.956621262661703, "learning_rate": 1.2651931815634175e-07, "loss": 0.4248, "step": 2621 }, { "ETA": 0.93, "epoch": 0.8432223830197781, "fp16_scale": 1.0, "global_step": 2622, "grad_norm": 1.8212862537387442, "learning_rate": 1.260124915869988e-07, "loss": 0.4636, "step": 2622 }, { "ETA": 0.93, "epoch": 0.8435439781315324, "fp16_scale": 1.0, "global_step": 2623, "grad_norm": 1.7661124921171842, "learning_rate": 1.2550661393918215e-07, "loss": 0.3898, "step": 2623 }, { "ETA": 0.93, "epoch": 0.8438655732432867, "fp16_scale": 1.0, "global_step": 2624, "grad_norm": 1.955439051149376, "learning_rate": 1.2500168576214197e-07, "loss": 0.4064, "step": 2624 }, { "ETA": 0.93, "epoch": 0.844187168355041, "fp16_scale": 1.0, "global_step": 2625, "grad_norm": 1.96149002647226, "learning_rate": 1.2449770760409816e-07, "loss": 0.4436, "step": 2625 }, { "ETA": 0.92, "epoch": 0.8445087634667953, "fp16_scale": 1.0, "global_step": 2626, "grad_norm": 2.0730734735820278, "learning_rate": 1.2399468001223933e-07, "loss": 0.3844, "step": 2626 }, { "ETA": 0.92, "epoch": 0.8448303585785496, "fp16_scale": 1.0, "global_step": 2627, "grad_norm": 1.697317809477219, "learning_rate": 1.2349260353272117e-07, "loss": 0.4151, "step": 2627 }, { "ETA": 0.92, "epoch": 0.8451519536903039, "fp16_scale": 1.0, "global_step": 2628, "grad_norm": 2.0335135160703293, "learning_rate": 1.2299147871066772e-07, "loss": 0.4135, "step": 2628 }, { "ETA": 0.92, "epoch": 0.8454735488020582, "fp16_scale": 1.0, "global_step": 2629, "grad_norm": 1.9677654311782051, "learning_rate": 1.2249130609016878e-07, "loss": 0.3195, "step": 2629 }, { "ETA": 0.92, "epoch": 0.8457951439138125, "fp16_scale": 1.0, "global_step": 2630, "grad_norm": 1.9458438484963143, "learning_rate": 1.2199208621428114e-07, "loss": 0.4446, "step": 2630 }, { "ETA": 0.91, "epoch": 0.8461167390255668, "fp16_scale": 1.0, "global_step": 2631, "grad_norm": 1.9836968497409233, "learning_rate": 1.2149381962502704e-07, "loss": 0.4003, "step": 2631 }, { "ETA": 0.91, "epoch": 0.8464383341373211, "fp16_scale": 1.0, "global_step": 2632, "grad_norm": 1.939942071471052, "learning_rate": 1.2099650686339303e-07, "loss": 0.4848, "step": 2632 }, { "ETA": 0.91, "epoch": 0.8467599292490754, "fp16_scale": 1.0, "global_step": 2633, "grad_norm": 1.9449697207489793, "learning_rate": 1.2050014846933088e-07, "loss": 0.3906, "step": 2633 }, { "ETA": 0.91, "epoch": 0.8470815243608297, "fp16_scale": 1.0, "global_step": 2634, "grad_norm": 2.130988635225944, "learning_rate": 1.200047449817555e-07, "loss": 0.4357, "step": 2634 }, { "ETA": 0.91, "epoch": 0.8474031194725841, "fp16_scale": 1.0, "global_step": 2635, "grad_norm": 1.9468733306836092, "learning_rate": 1.195102969385454e-07, "loss": 0.4015, "step": 2635 }, { "ETA": 0.9, "epoch": 0.8477247145843383, "fp16_scale": 1.0, "global_step": 2636, "grad_norm": 1.874569127546959, "learning_rate": 1.1901680487654198e-07, "loss": 0.3889, "step": 2636 }, { "ETA": 0.9, "epoch": 0.8480463096960926, "fp16_scale": 1.0, "global_step": 2637, "grad_norm": 2.015964467356171, "learning_rate": 1.185242693315479e-07, "loss": 0.4379, "step": 2637 }, { "ETA": 0.9, "epoch": 0.848367904807847, "fp16_scale": 1.0, "global_step": 2638, "grad_norm": 2.09912297413883, "learning_rate": 1.1803269083832812e-07, "loss": 0.391, "step": 2638 }, { "ETA": 0.9, "epoch": 0.8486894999196012, "fp16_scale": 1.0, "global_step": 2639, "grad_norm": 2.099103717179329, "learning_rate": 1.175420699306079e-07, "loss": 0.4343, "step": 2639 }, { "ETA": 0.9, "epoch": 0.8490110950313555, "fp16_scale": 1.0, "global_step": 2640, "grad_norm": 1.9964999767032128, "learning_rate": 1.1705240714107301e-07, "loss": 0.3826, "step": 2640 }, { "ETA": 0.9, "epoch": 0.8493326901431099, "fp16_scale": 1.0, "global_step": 2641, "grad_norm": 1.9542936081754776, "learning_rate": 1.1656370300136942e-07, "loss": 0.3872, "step": 2641 }, { "ETA": 0.89, "epoch": 0.8496542852548641, "fp16_scale": 1.0, "global_step": 2642, "grad_norm": 2.152354652101079, "learning_rate": 1.1607595804210124e-07, "loss": 0.5051, "step": 2642 }, { "ETA": 0.89, "epoch": 0.8499758803666184, "fp16_scale": 1.0, "global_step": 2643, "grad_norm": 2.0311135244125484, "learning_rate": 1.1558917279283231e-07, "loss": 0.4417, "step": 2643 }, { "ETA": 0.89, "epoch": 0.8502974754783728, "fp16_scale": 1.0, "global_step": 2644, "grad_norm": 2.1813857236598975, "learning_rate": 1.151033477820833e-07, "loss": 0.4612, "step": 2644 }, { "ETA": 0.89, "epoch": 0.850619070590127, "fp16_scale": 1.0, "global_step": 2645, "grad_norm": 2.149405263063172, "learning_rate": 1.1461848353733361e-07, "loss": 0.4194, "step": 2645 }, { "ETA": 0.89, "epoch": 0.8509406657018813, "fp16_scale": 1.0, "global_step": 2646, "grad_norm": 2.2238454576920534, "learning_rate": 1.1413458058501802e-07, "loss": 0.4094, "step": 2646 }, { "ETA": 0.88, "epoch": 0.8512622608136357, "fp16_scale": 1.0, "global_step": 2647, "grad_norm": 1.8842189951772699, "learning_rate": 1.1365163945052925e-07, "loss": 0.3989, "step": 2647 }, { "ETA": 0.88, "epoch": 0.85158385592539, "fp16_scale": 1.0, "global_step": 2648, "grad_norm": 1.8363827348750568, "learning_rate": 1.1316966065821454e-07, "loss": 0.4368, "step": 2648 }, { "ETA": 0.88, "epoch": 0.8519054510371442, "fp16_scale": 1.0, "global_step": 2649, "grad_norm": 1.9344762430833913, "learning_rate": 1.1268864473137629e-07, "loss": 0.4235, "step": 2649 }, { "ETA": 0.88, "epoch": 0.8522270461488985, "fp16_scale": 1.0, "global_step": 2650, "grad_norm": 2.07811357918809, "learning_rate": 1.122085921922723e-07, "loss": 0.5248, "step": 2650 }, { "ETA": 0.88, "epoch": 0.8525486412606529, "fp16_scale": 1.0, "global_step": 2651, "grad_norm": 1.8786943966368947, "learning_rate": 1.1172950356211353e-07, "loss": 0.469, "step": 2651 }, { "ETA": 0.87, "epoch": 0.8528702363724071, "fp16_scale": 1.0, "global_step": 2652, "grad_norm": 1.8848309361301367, "learning_rate": 1.1125137936106487e-07, "loss": 0.4202, "step": 2652 }, { "ETA": 0.87, "epoch": 0.8531918314841614, "fp16_scale": 1.0, "global_step": 2653, "grad_norm": 1.974186893266101, "learning_rate": 1.1077422010824422e-07, "loss": 0.4633, "step": 2653 }, { "ETA": 0.87, "epoch": 0.8535134265959158, "fp16_scale": 1.0, "global_step": 2654, "grad_norm": 2.0723784802037435, "learning_rate": 1.1029802632172114e-07, "loss": 0.4298, "step": 2654 }, { "ETA": 0.87, "epoch": 0.85383502170767, "fp16_scale": 1.0, "global_step": 2655, "grad_norm": 2.0158722543798717, "learning_rate": 1.0982279851851773e-07, "loss": 0.4086, "step": 2655 }, { "ETA": 0.87, "epoch": 0.8541566168194243, "fp16_scale": 1.0, "global_step": 2656, "grad_norm": 2.017577807401529, "learning_rate": 1.0934853721460669e-07, "loss": 0.5093, "step": 2656 }, { "ETA": 0.87, "epoch": 0.8544782119311787, "fp16_scale": 1.0, "global_step": 2657, "grad_norm": 2.142932809761941, "learning_rate": 1.0887524292491146e-07, "loss": 0.4552, "step": 2657 }, { "ETA": 0.86, "epoch": 0.8547998070429329, "fp16_scale": 1.0, "global_step": 2658, "grad_norm": 2.255076092998724, "learning_rate": 1.0840291616330621e-07, "loss": 0.4351, "step": 2658 }, { "ETA": 0.86, "epoch": 0.8551214021546872, "fp16_scale": 1.0, "global_step": 2659, "grad_norm": 2.0571021381863024, "learning_rate": 1.079315574426135e-07, "loss": 0.4973, "step": 2659 }, { "ETA": 0.86, "epoch": 0.8554429972664416, "fp16_scale": 1.0, "global_step": 2660, "grad_norm": 2.0775891851057935, "learning_rate": 1.0746116727460585e-07, "loss": 0.4605, "step": 2660 }, { "ETA": 0.86, "epoch": 0.8557645923781958, "fp16_scale": 1.0, "global_step": 2661, "grad_norm": 2.12686010950699, "learning_rate": 1.0699174617000351e-07, "loss": 0.3744, "step": 2661 }, { "ETA": 0.86, "epoch": 0.8560861874899501, "fp16_scale": 1.0, "global_step": 2662, "grad_norm": 1.9566894590483266, "learning_rate": 1.0652329463847497e-07, "loss": 0.3784, "step": 2662 }, { "ETA": 0.85, "epoch": 0.8564077826017045, "fp16_scale": 1.0, "global_step": 2663, "grad_norm": 1.8731186442283223, "learning_rate": 1.0605581318863576e-07, "loss": 0.3945, "step": 2663 }, { "ETA": 0.85, "epoch": 0.8567293777134588, "fp16_scale": 1.0, "global_step": 2664, "grad_norm": 1.866417490280438, "learning_rate": 1.0558930232804874e-07, "loss": 0.4314, "step": 2664 }, { "ETA": 0.85, "epoch": 0.857050972825213, "fp16_scale": 1.0, "global_step": 2665, "grad_norm": 1.9607139553663306, "learning_rate": 1.0512376256322231e-07, "loss": 0.4226, "step": 2665 }, { "ETA": 0.85, "epoch": 0.8573725679369674, "fp16_scale": 1.0, "global_step": 2666, "grad_norm": 2.044435407805264, "learning_rate": 1.0465919439961024e-07, "loss": 0.4644, "step": 2666 }, { "ETA": 0.85, "epoch": 0.8576941630487217, "fp16_scale": 1.0, "global_step": 2667, "grad_norm": 2.2725526784619823, "learning_rate": 1.0419559834161262e-07, "loss": 0.4162, "step": 2667 }, { "ETA": 0.84, "epoch": 0.8580157581604759, "fp16_scale": 1.0, "global_step": 2668, "grad_norm": 2.093071455508944, "learning_rate": 1.0373297489257271e-07, "loss": 0.4536, "step": 2668 }, { "ETA": 0.84, "epoch": 0.8583373532722303, "fp16_scale": 1.0, "global_step": 2669, "grad_norm": 2.008772185369336, "learning_rate": 1.0327132455477872e-07, "loss": 0.4453, "step": 2669 }, { "ETA": 0.84, "epoch": 0.8586589483839846, "fp16_scale": 1.0, "global_step": 2670, "grad_norm": 2.178045069885504, "learning_rate": 1.0281064782946213e-07, "loss": 0.4281, "step": 2670 }, { "ETA": 0.84, "epoch": 0.8589805434957388, "fp16_scale": 1.0, "global_step": 2671, "grad_norm": 1.7575765820723166, "learning_rate": 1.0235094521679688e-07, "loss": 0.3138, "step": 2671 }, { "ETA": 0.84, "epoch": 0.8593021386074932, "fp16_scale": 1.0, "global_step": 2672, "grad_norm": 1.800148660908802, "learning_rate": 1.0189221721590002e-07, "loss": 0.3898, "step": 2672 }, { "ETA": 0.83, "epoch": 0.8596237337192475, "fp16_scale": 1.0, "global_step": 2673, "grad_norm": 1.8282739859272057, "learning_rate": 1.014344643248295e-07, "loss": 0.4007, "step": 2673 }, { "ETA": 0.83, "epoch": 0.8599453288310017, "fp16_scale": 1.0, "global_step": 2674, "grad_norm": 1.9206677472736489, "learning_rate": 1.0097768704058541e-07, "loss": 0.3872, "step": 2674 }, { "ETA": 0.83, "epoch": 0.8602669239427561, "fp16_scale": 1.0, "global_step": 2675, "grad_norm": 1.921346141580359, "learning_rate": 1.0052188585910837e-07, "loss": 0.4243, "step": 2675 }, { "ETA": 0.83, "epoch": 0.8605885190545104, "fp16_scale": 1.0, "global_step": 2676, "grad_norm": 2.109667789704599, "learning_rate": 1.0006706127527864e-07, "loss": 0.3659, "step": 2676 }, { "ETA": 0.83, "epoch": 0.8609101141662646, "fp16_scale": 1.0, "global_step": 2677, "grad_norm": 2.4039133973685844, "learning_rate": 9.961321378291709e-08, "loss": 0.3993, "step": 2677 }, { "ETA": 0.82, "epoch": 0.861231709278019, "fp16_scale": 1.0, "global_step": 2678, "grad_norm": 1.9960946402114894, "learning_rate": 9.916034387478277e-08, "loss": 0.4358, "step": 2678 }, { "ETA": 0.82, "epoch": 0.8615533043897733, "fp16_scale": 1.0, "global_step": 2679, "grad_norm": 1.8065461152088664, "learning_rate": 9.870845204257394e-08, "loss": 0.4388, "step": 2679 }, { "ETA": 0.82, "epoch": 0.8618748995015276, "fp16_scale": 1.0, "global_step": 2680, "grad_norm": 2.033474167397842, "learning_rate": 9.825753877692689e-08, "loss": 0.4701, "step": 2680 }, { "ETA": 0.82, "epoch": 0.8621964946132818, "fp16_scale": 1.0, "global_step": 2681, "grad_norm": 1.9026420844908551, "learning_rate": 9.780760456741554e-08, "loss": 0.4008, "step": 2681 }, { "ETA": 0.82, "epoch": 0.8625180897250362, "fp16_scale": 1.0, "global_step": 2682, "grad_norm": 2.306147518834476, "learning_rate": 9.735864990255016e-08, "loss": 0.4138, "step": 2682 }, { "ETA": 0.82, "epoch": 0.8628396848367905, "fp16_scale": 1.0, "global_step": 2683, "grad_norm": 1.7908639279612824, "learning_rate": 9.691067526977803e-08, "loss": 0.4172, "step": 2683 }, { "ETA": 0.81, "epoch": 0.8631612799485447, "fp16_scale": 1.0, "global_step": 2684, "grad_norm": 1.9653635756092456, "learning_rate": 9.646368115548231e-08, "loss": 0.4443, "step": 2684 }, { "ETA": 0.81, "epoch": 0.8634828750602991, "fp16_scale": 1.0, "global_step": 2685, "grad_norm": 1.8720711898442557, "learning_rate": 9.601766804498157e-08, "loss": 0.4053, "step": 2685 }, { "ETA": 0.81, "epoch": 0.8638044701720534, "fp16_scale": 1.0, "global_step": 2686, "grad_norm": 1.9331466478282657, "learning_rate": 9.557263642252944e-08, "loss": 0.3921, "step": 2686 }, { "ETA": 0.81, "epoch": 0.8641260652838076, "fp16_scale": 1.0, "global_step": 2687, "grad_norm": 1.9640580317615708, "learning_rate": 9.512858677131341e-08, "loss": 0.4809, "step": 2687 }, { "ETA": 0.81, "epoch": 0.864447660395562, "fp16_scale": 1.0, "global_step": 2688, "grad_norm": 1.8557612803074546, "learning_rate": 9.468551957345505e-08, "loss": 0.4485, "step": 2688 }, { "ETA": 0.8, "epoch": 0.8647692555073163, "fp16_scale": 1.0, "global_step": 2689, "grad_norm": 2.320872209504059, "learning_rate": 9.424343531000967e-08, "loss": 0.4452, "step": 2689 }, { "ETA": 0.8, "epoch": 0.8650908506190705, "fp16_scale": 1.0, "global_step": 2690, "grad_norm": 2.0004259465596457, "learning_rate": 9.380233446096441e-08, "loss": 0.4013, "step": 2690 }, { "ETA": 0.8, "epoch": 0.8654124457308249, "fp16_scale": 1.0, "global_step": 2691, "grad_norm": 2.426862627617974, "learning_rate": 9.336221750523965e-08, "loss": 0.37, "step": 2691 }, { "ETA": 0.8, "epoch": 0.8657340408425792, "fp16_scale": 1.0, "global_step": 2692, "grad_norm": 2.0556781304120655, "learning_rate": 9.292308492068713e-08, "loss": 0.3694, "step": 2692 }, { "ETA": 0.8, "epoch": 0.8660556359543335, "fp16_scale": 1.0, "global_step": 2693, "grad_norm": 2.319035031009702, "learning_rate": 9.24849371840899e-08, "loss": 0.4246, "step": 2693 }, { "ETA": 0.79, "epoch": 0.8663772310660878, "fp16_scale": 1.0, "global_step": 2694, "grad_norm": 1.7868633454422285, "learning_rate": 9.204777477116155e-08, "loss": 0.4009, "step": 2694 }, { "ETA": 0.79, "epoch": 0.8666988261778421, "fp16_scale": 1.0, "global_step": 2695, "grad_norm": 2.0346070033620487, "learning_rate": 9.161159815654573e-08, "loss": 0.4271, "step": 2695 }, { "ETA": 0.79, "epoch": 0.8670204212895964, "fp16_scale": 1.0, "global_step": 2696, "grad_norm": 1.776428647891114, "learning_rate": 9.11764078138162e-08, "loss": 0.359, "step": 2696 }, { "ETA": 0.79, "epoch": 0.8673420164013507, "fp16_scale": 1.0, "global_step": 2697, "grad_norm": 1.974715017816616, "learning_rate": 9.074220421547563e-08, "loss": 0.4793, "step": 2697 }, { "ETA": 0.79, "epoch": 0.867663611513105, "fp16_scale": 1.0, "global_step": 2698, "grad_norm": 2.0667641671046297, "learning_rate": 9.030898783295571e-08, "loss": 0.4323, "step": 2698 }, { "ETA": 0.78, "epoch": 0.8679852066248593, "fp16_scale": 1.0, "global_step": 2699, "grad_norm": 2.3097626181233077, "learning_rate": 8.987675913661574e-08, "loss": 0.3421, "step": 2699 }, { "ETA": 0.78, "epoch": 0.8683068017366136, "fp16_scale": 1.0, "global_step": 2700, "grad_norm": 1.9625198456674338, "learning_rate": 8.944551859574268e-08, "loss": 0.4458, "step": 2700 }, { "ETA": 0.78, "epoch": 0.8686283968483679, "fp16_scale": 1.0, "global_step": 2701, "grad_norm": 2.01924383728897, "learning_rate": 8.901526667855097e-08, "loss": 0.3976, "step": 2701 }, { "ETA": 0.78, "epoch": 0.8689499919601222, "fp16_scale": 1.0, "global_step": 2702, "grad_norm": 1.708273426556552, "learning_rate": 8.858600385218151e-08, "loss": 0.3952, "step": 2702 }, { "ETA": 0.78, "epoch": 0.8692715870718765, "fp16_scale": 1.0, "global_step": 2703, "grad_norm": 2.04331318356076, "learning_rate": 8.815773058270148e-08, "loss": 0.3858, "step": 2703 }, { "ETA": 0.78, "epoch": 0.8695931821836308, "fp16_scale": 1.0, "global_step": 2704, "grad_norm": 1.9244381399724477, "learning_rate": 8.773044733510337e-08, "loss": 0.4292, "step": 2704 }, { "ETA": 0.77, "epoch": 0.8699147772953851, "fp16_scale": 1.0, "global_step": 2705, "grad_norm": 1.9755569703236773, "learning_rate": 8.730415457330464e-08, "loss": 0.4936, "step": 2705 }, { "ETA": 0.77, "epoch": 0.8702363724071394, "fp16_scale": 1.0, "global_step": 2706, "grad_norm": 1.9797344852629037, "learning_rate": 8.687885276014784e-08, "loss": 0.5014, "step": 2706 }, { "ETA": 0.77, "epoch": 0.8705579675188937, "fp16_scale": 1.0, "global_step": 2707, "grad_norm": 2.0958508443954567, "learning_rate": 8.645454235739902e-08, "loss": 0.4487, "step": 2707 }, { "ETA": 0.77, "epoch": 0.870879562630648, "fp16_scale": 1.0, "global_step": 2708, "grad_norm": 1.8913047874929232, "learning_rate": 8.603122382574868e-08, "loss": 0.4689, "step": 2708 }, { "ETA": 0.77, "epoch": 0.8712011577424024, "fp16_scale": 1.0, "global_step": 2709, "grad_norm": 1.9407118115129578, "learning_rate": 8.560889762480949e-08, "loss": 0.4018, "step": 2709 }, { "ETA": 0.76, "epoch": 0.8715227528541566, "fp16_scale": 1.0, "global_step": 2710, "grad_norm": 1.92871027174814, "learning_rate": 8.518756421311734e-08, "loss": 0.4175, "step": 2710 }, { "ETA": 0.76, "epoch": 0.8718443479659109, "fp16_scale": 1.0, "global_step": 2711, "grad_norm": 1.8382717354407931, "learning_rate": 8.476722404812975e-08, "loss": 0.3714, "step": 2711 }, { "ETA": 0.76, "epoch": 0.8721659430776653, "fp16_scale": 1.0, "global_step": 2712, "grad_norm": 2.097631682170976, "learning_rate": 8.434787758622597e-08, "loss": 0.4672, "step": 2712 }, { "ETA": 0.76, "epoch": 0.8724875381894195, "fp16_scale": 1.0, "global_step": 2713, "grad_norm": 2.2517755829643176, "learning_rate": 8.392952528270659e-08, "loss": 0.4415, "step": 2713 }, { "ETA": 0.76, "epoch": 0.8728091333011738, "fp16_scale": 1.0, "global_step": 2714, "grad_norm": 1.961201675919311, "learning_rate": 8.351216759179247e-08, "loss": 0.4135, "step": 2714 }, { "ETA": 0.75, "epoch": 0.8731307284129282, "fp16_scale": 1.0, "global_step": 2715, "grad_norm": 2.041682500027486, "learning_rate": 8.309580496662527e-08, "loss": 0.3429, "step": 2715 }, { "ETA": 0.75, "epoch": 0.8734523235246824, "fp16_scale": 1.0, "global_step": 2716, "grad_norm": 1.993755032287539, "learning_rate": 8.268043785926526e-08, "loss": 0.485, "step": 2716 }, { "ETA": 0.75, "epoch": 0.8737739186364367, "fp16_scale": 1.0, "global_step": 2717, "grad_norm": 2.050738264187792, "learning_rate": 8.226606672069226e-08, "loss": 0.3753, "step": 2717 }, { "ETA": 0.75, "epoch": 0.874095513748191, "fp16_scale": 1.0, "global_step": 2718, "grad_norm": 2.1505532918193864, "learning_rate": 8.185269200080502e-08, "loss": 0.3564, "step": 2718 }, { "ETA": 0.75, "epoch": 0.8744171088599453, "fp16_scale": 1.0, "global_step": 2719, "grad_norm": 2.214996888291509, "learning_rate": 8.144031414842012e-08, "loss": 0.4368, "step": 2719 }, { "ETA": 0.74, "epoch": 0.8747387039716996, "fp16_scale": 1.0, "global_step": 2720, "grad_norm": 1.96114501865112, "learning_rate": 8.102893361127216e-08, "loss": 0.4957, "step": 2720 }, { "ETA": 0.74, "epoch": 0.875060299083454, "fp16_scale": 1.0, "global_step": 2721, "grad_norm": 1.7876486697715273, "learning_rate": 8.061855083601232e-08, "loss": 0.3758, "step": 2721 }, { "ETA": 0.74, "epoch": 0.8753818941952083, "fp16_scale": 1.0, "global_step": 2722, "grad_norm": 2.0905142702443866, "learning_rate": 8.020916626820918e-08, "loss": 0.4295, "step": 2722 }, { "ETA": 0.74, "epoch": 0.8757034893069625, "fp16_scale": 1.0, "global_step": 2723, "grad_norm": 2.113068852745191, "learning_rate": 7.98007803523466e-08, "loss": 0.3791, "step": 2723 }, { "ETA": 0.74, "epoch": 0.8760250844187168, "fp16_scale": 1.0, "global_step": 2724, "grad_norm": 1.9401652158306832, "learning_rate": 7.939339353182517e-08, "loss": 0.3808, "step": 2724 }, { "ETA": 0.73, "epoch": 0.8763466795304712, "fp16_scale": 1.0, "global_step": 2725, "grad_norm": 2.034665160963121, "learning_rate": 7.898700624896027e-08, "loss": 0.3465, "step": 2725 }, { "ETA": 0.73, "epoch": 0.8766682746422254, "fp16_scale": 1.0, "global_step": 2726, "grad_norm": 1.8507702561681787, "learning_rate": 7.858161894498172e-08, "loss": 0.4083, "step": 2726 }, { "ETA": 0.73, "epoch": 0.8769898697539797, "fp16_scale": 1.0, "global_step": 2727, "grad_norm": 2.1941389801374247, "learning_rate": 7.817723206003446e-08, "loss": 0.3664, "step": 2727 }, { "ETA": 0.73, "epoch": 0.8773114648657341, "fp16_scale": 1.0, "global_step": 2728, "grad_norm": 1.8409107945974215, "learning_rate": 7.777384603317638e-08, "loss": 0.5171, "step": 2728 }, { "ETA": 0.73, "epoch": 0.8776330599774883, "fp16_scale": 1.0, "global_step": 2729, "grad_norm": 1.932922086837846, "learning_rate": 7.737146130237871e-08, "loss": 0.4135, "step": 2729 }, { "ETA": 0.73, "epoch": 0.8779546550892426, "fp16_scale": 1.0, "global_step": 2730, "grad_norm": 2.286567034059594, "learning_rate": 7.697007830452673e-08, "loss": 0.4116, "step": 2730 }, { "ETA": 0.72, "epoch": 0.878276250200997, "fp16_scale": 1.0, "global_step": 2731, "grad_norm": 2.0746783678866607, "learning_rate": 7.656969747541663e-08, "loss": 0.3823, "step": 2731 }, { "ETA": 0.72, "epoch": 0.8785978453127512, "fp16_scale": 1.0, "global_step": 2732, "grad_norm": 1.8478197569383832, "learning_rate": 7.617031924975736e-08, "loss": 0.4208, "step": 2732 }, { "ETA": 0.72, "epoch": 0.8789194404245055, "fp16_scale": 1.0, "global_step": 2733, "grad_norm": 2.002994476522452, "learning_rate": 7.577194406116915e-08, "loss": 0.342, "step": 2733 }, { "ETA": 0.72, "epoch": 0.8792410355362599, "fp16_scale": 1.0, "global_step": 2734, "grad_norm": 1.859219332222486, "learning_rate": 7.53745723421827e-08, "loss": 0.4539, "step": 2734 }, { "ETA": 0.72, "epoch": 0.8795626306480141, "fp16_scale": 1.0, "global_step": 2735, "grad_norm": 1.6261147657752693, "learning_rate": 7.497820452423998e-08, "loss": 0.3786, "step": 2735 }, { "ETA": 0.71, "epoch": 0.8798842257597684, "fp16_scale": 1.0, "global_step": 2736, "grad_norm": 2.5133372634963385, "learning_rate": 7.458284103769252e-08, "loss": 0.4415, "step": 2736 }, { "ETA": 0.71, "epoch": 0.8802058208715228, "fp16_scale": 1.0, "global_step": 2737, "grad_norm": 1.985319547549968, "learning_rate": 7.418848231180175e-08, "loss": 0.439, "step": 2737 }, { "ETA": 0.71, "epoch": 0.8805274159832771, "fp16_scale": 1.0, "global_step": 2738, "grad_norm": 1.940018471801165, "learning_rate": 7.379512877473748e-08, "loss": 0.3708, "step": 2738 }, { "ETA": 0.71, "epoch": 0.8808490110950313, "fp16_scale": 1.0, "global_step": 2739, "grad_norm": 1.9191589322087108, "learning_rate": 7.340278085357909e-08, "loss": 0.3948, "step": 2739 }, { "ETA": 0.71, "epoch": 0.8811706062067857, "fp16_scale": 1.0, "global_step": 2740, "grad_norm": 1.9885083066858489, "learning_rate": 7.301143897431339e-08, "loss": 0.4462, "step": 2740 }, { "ETA": 0.7, "epoch": 0.88149220131854, "fp16_scale": 1.0, "global_step": 2741, "grad_norm": 2.0307542959584604, "learning_rate": 7.262110356183516e-08, "loss": 0.4325, "step": 2741 }, { "ETA": 0.7, "epoch": 0.8818137964302942, "fp16_scale": 1.0, "global_step": 2742, "grad_norm": 1.837682696054374, "learning_rate": 7.223177503994671e-08, "loss": 0.3813, "step": 2742 }, { "ETA": 0.7, "epoch": 0.8821353915420486, "fp16_scale": 1.0, "global_step": 2743, "grad_norm": 2.0378733960131723, "learning_rate": 7.184345383135648e-08, "loss": 0.4802, "step": 2743 }, { "ETA": 0.7, "epoch": 0.8824569866538029, "fp16_scale": 1.0, "global_step": 2744, "grad_norm": 1.7920779749086864, "learning_rate": 7.145614035767988e-08, "loss": 0.4465, "step": 2744 }, { "ETA": 0.7, "epoch": 0.8827785817655571, "fp16_scale": 1.0, "global_step": 2745, "grad_norm": 1.9659612418072967, "learning_rate": 7.106983503943764e-08, "loss": 0.4623, "step": 2745 }, { "ETA": 0.69, "epoch": 0.8831001768773115, "fp16_scale": 1.0, "global_step": 2746, "grad_norm": 2.2622281114188953, "learning_rate": 7.068453829605625e-08, "loss": 0.4464, "step": 2746 }, { "ETA": 0.69, "epoch": 0.8834217719890658, "fp16_scale": 1.0, "global_step": 2747, "grad_norm": 2.0562916836782987, "learning_rate": 7.030025054586731e-08, "loss": 0.4161, "step": 2747 }, { "ETA": 0.69, "epoch": 0.88374336710082, "fp16_scale": 1.0, "global_step": 2748, "grad_norm": 1.9244367543472374, "learning_rate": 6.991697220610638e-08, "loss": 0.4549, "step": 2748 }, { "ETA": 0.69, "epoch": 0.8840649622125744, "fp16_scale": 1.0, "global_step": 2749, "grad_norm": 2.0026094793621714, "learning_rate": 6.953470369291348e-08, "loss": 0.4025, "step": 2749 }, { "ETA": 0.69, "epoch": 0.8843865573243287, "fp16_scale": 1.0, "global_step": 2750, "grad_norm": 2.0039552999787085, "learning_rate": 6.915344542133195e-08, "loss": 0.4755, "step": 2750 }, { "ETA": 0.69, "epoch": 0.8847081524360829, "fp16_scale": 1.0, "global_step": 2751, "grad_norm": 2.168757933772285, "learning_rate": 6.877319780530844e-08, "loss": 0.3671, "step": 2751 }, { "ETA": 0.68, "epoch": 0.8850297475478373, "fp16_scale": 1.0, "global_step": 2752, "grad_norm": 2.0765117403074647, "learning_rate": 6.839396125769258e-08, "loss": 0.36, "step": 2752 }, { "ETA": 0.68, "epoch": 0.8853513426595916, "fp16_scale": 1.0, "global_step": 2753, "grad_norm": 2.0748444198922704, "learning_rate": 6.801573619023549e-08, "loss": 0.364, "step": 2753 }, { "ETA": 0.68, "epoch": 0.8856729377713459, "fp16_scale": 1.0, "global_step": 2754, "grad_norm": 2.0661161487993818, "learning_rate": 6.763852301359086e-08, "loss": 0.4385, "step": 2754 }, { "ETA": 0.68, "epoch": 0.8859945328831001, "fp16_scale": 1.0, "global_step": 2755, "grad_norm": 1.889073661932241, "learning_rate": 6.72623221373132e-08, "loss": 0.4008, "step": 2755 }, { "ETA": 0.68, "epoch": 0.8863161279948545, "fp16_scale": 1.0, "global_step": 2756, "grad_norm": 2.1217875274225824, "learning_rate": 6.688713396985835e-08, "loss": 0.4568, "step": 2756 }, { "ETA": 0.67, "epoch": 0.8866377231066088, "fp16_scale": 1.0, "global_step": 2757, "grad_norm": 1.9332812064355187, "learning_rate": 6.651295891858211e-08, "loss": 0.3302, "step": 2757 }, { "ETA": 0.67, "epoch": 0.886959318218363, "fp16_scale": 1.0, "global_step": 2758, "grad_norm": 2.952396656380801, "learning_rate": 6.613979738974073e-08, "loss": 0.4236, "step": 2758 }, { "ETA": 0.67, "epoch": 0.8872809133301174, "fp16_scale": 1.0, "global_step": 2759, "grad_norm": 1.9415780066928712, "learning_rate": 6.576764978849003e-08, "loss": 0.4657, "step": 2759 }, { "ETA": 0.67, "epoch": 0.8876025084418717, "fp16_scale": 1.0, "global_step": 2760, "grad_norm": 2.33496607348724, "learning_rate": 6.539651651888455e-08, "loss": 0.4484, "step": 2760 }, { "ETA": 0.67, "epoch": 0.8879241035536259, "fp16_scale": 1.0, "global_step": 2761, "grad_norm": 1.9715363195482916, "learning_rate": 6.50263979838781e-08, "loss": 0.4244, "step": 2761 }, { "ETA": 0.66, "epoch": 0.8882456986653803, "fp16_scale": 1.0, "global_step": 2762, "grad_norm": 2.1190461218668277, "learning_rate": 6.46572945853222e-08, "loss": 0.3667, "step": 2762 }, { "ETA": 0.66, "epoch": 0.8885672937771346, "fp16_scale": 1.0, "global_step": 2763, "grad_norm": 1.9899442435298367, "learning_rate": 6.428920672396665e-08, "loss": 0.3984, "step": 2763 }, { "ETA": 0.66, "epoch": 0.8888888888888888, "fp16_scale": 1.0, "global_step": 2764, "grad_norm": 2.1433779788227505, "learning_rate": 6.392213479945851e-08, "loss": 0.4387, "step": 2764 }, { "ETA": 0.66, "epoch": 0.8892104840006432, "fp16_scale": 1.0, "global_step": 2765, "grad_norm": 1.958060690192412, "learning_rate": 6.355607921034145e-08, "loss": 0.3963, "step": 2765 }, { "ETA": 0.66, "epoch": 0.8895320791123975, "fp16_scale": 1.0, "global_step": 2766, "grad_norm": 2.359572994511273, "learning_rate": 6.319104035405642e-08, "loss": 0.4135, "step": 2766 }, { "ETA": 0.65, "epoch": 0.8898536742241518, "fp16_scale": 1.0, "global_step": 2767, "grad_norm": 1.9099708137215627, "learning_rate": 6.282701862693962e-08, "loss": 0.5077, "step": 2767 }, { "ETA": 0.65, "epoch": 0.8901752693359061, "fp16_scale": 1.0, "global_step": 2768, "grad_norm": 2.014617572852332, "learning_rate": 6.246401442422345e-08, "loss": 0.3983, "step": 2768 }, { "ETA": 0.65, "epoch": 0.8904968644476604, "fp16_scale": 1.0, "global_step": 2769, "grad_norm": 2.3235058046937676, "learning_rate": 6.21020281400354e-08, "loss": 0.4439, "step": 2769 }, { "ETA": 0.65, "epoch": 0.8908184595594147, "fp16_scale": 1.0, "global_step": 2770, "grad_norm": 1.7802979753919927, "learning_rate": 6.174106016739777e-08, "loss": 0.4451, "step": 2770 }, { "ETA": 0.65, "epoch": 0.891140054671169, "fp16_scale": 1.0, "global_step": 2771, "grad_norm": 1.9911169744032278, "learning_rate": 6.138111089822729e-08, "loss": 0.4853, "step": 2771 }, { "ETA": 0.64, "epoch": 0.8914616497829233, "fp16_scale": 1.0, "global_step": 2772, "grad_norm": 1.9804843218216104, "learning_rate": 6.102218072333443e-08, "loss": 0.4372, "step": 2772 }, { "ETA": 0.64, "epoch": 0.8917832448946776, "fp16_scale": 1.0, "global_step": 2773, "grad_norm": 1.8782788491617932, "learning_rate": 6.066427003242358e-08, "loss": 0.47, "step": 2773 }, { "ETA": 0.64, "epoch": 0.8921048400064319, "fp16_scale": 1.0, "global_step": 2774, "grad_norm": 2.0014379014585333, "learning_rate": 6.030737921409168e-08, "loss": 0.4152, "step": 2774 }, { "ETA": 0.64, "epoch": 0.8924264351181862, "fp16_scale": 1.0, "global_step": 2775, "grad_norm": 1.7100955869657102, "learning_rate": 5.995150865582887e-08, "loss": 0.4357, "step": 2775 }, { "ETA": 0.64, "epoch": 0.8927480302299405, "fp16_scale": 1.0, "global_step": 2776, "grad_norm": 2.0662115662141978, "learning_rate": 5.9596658744017645e-08, "loss": 0.4112, "step": 2776 }, { "ETA": 0.64, "epoch": 0.8930696253416948, "fp16_scale": 1.0, "global_step": 2777, "grad_norm": 1.9429734424099012, "learning_rate": 5.924282986393159e-08, "loss": 0.3925, "step": 2777 }, { "ETA": 0.63, "epoch": 0.8933912204534491, "fp16_scale": 1.0, "global_step": 2778, "grad_norm": 1.973763094763008, "learning_rate": 5.889002239973651e-08, "loss": 0.3393, "step": 2778 }, { "ETA": 0.63, "epoch": 0.8937128155652034, "fp16_scale": 1.0, "global_step": 2779, "grad_norm": 1.885547944082085, "learning_rate": 5.8538236734488765e-08, "loss": 0.4881, "step": 2779 }, { "ETA": 0.63, "epoch": 0.8940344106769577, "fp16_scale": 1.0, "global_step": 2780, "grad_norm": 1.7525422661166352, "learning_rate": 5.81874732501354e-08, "loss": 0.3359, "step": 2780 }, { "ETA": 0.63, "epoch": 0.894356005788712, "fp16_scale": 1.0, "global_step": 2781, "grad_norm": 1.9032992627126815, "learning_rate": 5.783773232751399e-08, "loss": 0.4142, "step": 2781 }, { "ETA": 0.63, "epoch": 0.8946776009004663, "fp16_scale": 1.0, "global_step": 2782, "grad_norm": 2.1776207196827646, "learning_rate": 5.7489014346351114e-08, "loss": 0.4638, "step": 2782 }, { "ETA": 0.62, "epoch": 0.8949991960122207, "fp16_scale": 1.0, "global_step": 2783, "grad_norm": 1.8962053377438484, "learning_rate": 5.71413196852637e-08, "loss": 0.3545, "step": 2783 }, { "ETA": 0.62, "epoch": 0.8953207911239749, "fp16_scale": 1.0, "global_step": 2784, "grad_norm": 2.0714169257639443, "learning_rate": 5.6794648721756656e-08, "loss": 0.5256, "step": 2784 }, { "ETA": 0.62, "epoch": 0.8956423862357292, "fp16_scale": 1.0, "global_step": 2785, "grad_norm": 2.0333492450531825, "learning_rate": 5.6449001832223895e-08, "loss": 0.4191, "step": 2785 }, { "ETA": 0.62, "epoch": 0.8959639813474836, "fp16_scale": 1.0, "global_step": 2786, "grad_norm": 1.8074962470777867, "learning_rate": 5.610437939194779e-08, "loss": 0.4456, "step": 2786 }, { "ETA": 0.62, "epoch": 0.8962855764592378, "fp16_scale": 1.0, "global_step": 2787, "grad_norm": 1.9607028035361536, "learning_rate": 5.5760781775097574e-08, "loss": 0.4336, "step": 2787 }, { "ETA": 0.61, "epoch": 0.8966071715709921, "fp16_scale": 1.0, "global_step": 2788, "grad_norm": 2.0004431256527466, "learning_rate": 5.5418209354730626e-08, "loss": 0.5431, "step": 2788 }, { "ETA": 0.61, "epoch": 0.8969287666827465, "fp16_scale": 1.0, "global_step": 2789, "grad_norm": 2.0203590109563883, "learning_rate": 5.507666250279053e-08, "loss": 0.4809, "step": 2789 }, { "ETA": 0.61, "epoch": 0.8972503617945007, "fp16_scale": 1.0, "global_step": 2790, "grad_norm": 1.92302946795985, "learning_rate": 5.4736141590107866e-08, "loss": 0.3758, "step": 2790 }, { "ETA": 0.61, "epoch": 0.897571956906255, "fp16_scale": 1.0, "global_step": 2791, "grad_norm": 1.9859721627098932, "learning_rate": 5.4396646986399454e-08, "loss": 0.3476, "step": 2791 }, { "ETA": 0.61, "epoch": 0.8978935520180094, "fp16_scale": 1.0, "global_step": 2792, "grad_norm": 1.9804620448459982, "learning_rate": 5.4058179060267e-08, "loss": 0.3944, "step": 2792 }, { "ETA": 0.6, "epoch": 0.8982151471297636, "fp16_scale": 1.0, "global_step": 2793, "grad_norm": 2.0532529562429547, "learning_rate": 5.372073817919842e-08, "loss": 0.5245, "step": 2793 }, { "ETA": 0.6, "epoch": 0.8985367422415179, "fp16_scale": 1.0, "global_step": 2794, "grad_norm": 1.9961537606507216, "learning_rate": 5.3384324709565887e-08, "loss": 0.4047, "step": 2794 }, { "ETA": 0.6, "epoch": 0.8988583373532723, "fp16_scale": 1.0, "global_step": 2795, "grad_norm": 2.4223557942125753, "learning_rate": 5.3048939016626547e-08, "loss": 0.3971, "step": 2795 }, { "ETA": 0.6, "epoch": 0.8991799324650265, "fp16_scale": 1.0, "global_step": 2796, "grad_norm": 1.934772789614562, "learning_rate": 5.2714581464521016e-08, "loss": 0.4174, "step": 2796 }, { "ETA": 0.6, "epoch": 0.8995015275767808, "fp16_scale": 1.0, "global_step": 2797, "grad_norm": 2.0986507181843743, "learning_rate": 5.238125241627456e-08, "loss": 0.4943, "step": 2797 }, { "ETA": 0.6, "epoch": 0.8998231226885351, "fp16_scale": 1.0, "global_step": 2798, "grad_norm": 1.9990106115715338, "learning_rate": 5.204895223379491e-08, "loss": 0.3806, "step": 2798 }, { "ETA": 0.59, "epoch": 0.9001447178002895, "fp16_scale": 1.0, "global_step": 2799, "grad_norm": 2.1018154315633675, "learning_rate": 5.171768127787302e-08, "loss": 0.4229, "step": 2799 }, { "ETA": 0.59, "epoch": 0.9004663129120437, "fp16_scale": 1.0, "global_step": 2800, "grad_norm": 1.9412235090122163, "learning_rate": 5.1387439908182505e-08, "loss": 0.362, "step": 2800 }, { "ETA": 0.59, "epoch": 0.900787908023798, "fp16_scale": 1.0, "global_step": 2801, "grad_norm": 1.9229842640914792, "learning_rate": 5.105822848327879e-08, "loss": 0.4345, "step": 2801 }, { "ETA": 0.59, "epoch": 0.9011095031355524, "fp16_scale": 1.0, "global_step": 2802, "grad_norm": 2.072485326088834, "learning_rate": 5.073004736059949e-08, "loss": 0.4725, "step": 2802 }, { "ETA": 0.59, "epoch": 0.9014310982473066, "fp16_scale": 1.0, "global_step": 2803, "grad_norm": 1.9110256803258092, "learning_rate": 5.040289689646338e-08, "loss": 0.4398, "step": 2803 }, { "ETA": 0.58, "epoch": 0.9017526933590609, "fp16_scale": 1.0, "global_step": 2804, "grad_norm": 1.9401269117169266, "learning_rate": 5.007677744606986e-08, "loss": 0.3845, "step": 2804 }, { "ETA": 0.58, "epoch": 0.9020742884708153, "fp16_scale": 1.0, "global_step": 2805, "grad_norm": 1.882339884357719, "learning_rate": 4.9751689363499714e-08, "loss": 0.3854, "step": 2805 }, { "ETA": 0.58, "epoch": 0.9023958835825695, "fp16_scale": 1.0, "global_step": 2806, "grad_norm": 1.9100152569468716, "learning_rate": 4.942763300171293e-08, "loss": 0.3934, "step": 2806 }, { "ETA": 0.58, "epoch": 0.9027174786943238, "fp16_scale": 1.0, "global_step": 2807, "grad_norm": 2.038750726725319, "learning_rate": 4.9104608712550065e-08, "loss": 0.4337, "step": 2807 }, { "ETA": 0.58, "epoch": 0.9030390738060782, "fp16_scale": 1.0, "global_step": 2808, "grad_norm": 2.017505647201532, "learning_rate": 4.878261684673102e-08, "loss": 0.3796, "step": 2808 }, { "ETA": 0.57, "epoch": 0.9033606689178324, "fp16_scale": 1.0, "global_step": 2809, "grad_norm": 2.053613315651322, "learning_rate": 4.846165775385458e-08, "loss": 0.3799, "step": 2809 }, { "ETA": 0.57, "epoch": 0.9036822640295867, "fp16_scale": 1.0, "global_step": 2810, "grad_norm": 2.2447420755578946, "learning_rate": 4.814173178239833e-08, "loss": 0.4516, "step": 2810 }, { "ETA": 0.57, "epoch": 0.9040038591413411, "fp16_scale": 1.0, "global_step": 2811, "grad_norm": 2.0541863410234313, "learning_rate": 4.782283927971775e-08, "loss": 0.4281, "step": 2811 }, { "ETA": 0.57, "epoch": 0.9043254542530954, "fp16_scale": 1.0, "global_step": 2812, "grad_norm": 2.0116360550988697, "learning_rate": 4.7504980592046776e-08, "loss": 0.3497, "step": 2812 }, { "ETA": 0.57, "epoch": 0.9046470493648496, "fp16_scale": 1.0, "global_step": 2813, "grad_norm": 2.0850594510307157, "learning_rate": 4.7188156064496664e-08, "loss": 0.3847, "step": 2813 }, { "ETA": 0.56, "epoch": 0.904968644476604, "fp16_scale": 1.0, "global_step": 2814, "grad_norm": 2.2255842436670634, "learning_rate": 4.687236604105615e-08, "loss": 0.4049, "step": 2814 }, { "ETA": 0.56, "epoch": 0.9052902395883583, "fp16_scale": 1.0, "global_step": 2815, "grad_norm": 1.8471804725282406, "learning_rate": 4.655761086459009e-08, "loss": 0.475, "step": 2815 }, { "ETA": 0.56, "epoch": 0.9056118347001125, "fp16_scale": 1.0, "global_step": 2816, "grad_norm": 2.1853785377843438, "learning_rate": 4.624389087684033e-08, "loss": 0.4647, "step": 2816 }, { "ETA": 0.56, "epoch": 0.9059334298118669, "fp16_scale": 1.0, "global_step": 2817, "grad_norm": 1.8683504344226407, "learning_rate": 4.593120641842474e-08, "loss": 0.4378, "step": 2817 }, { "ETA": 0.56, "epoch": 0.9062550249236212, "fp16_scale": 1.0, "global_step": 2818, "grad_norm": 2.1039763344759344, "learning_rate": 4.5619557828836306e-08, "loss": 0.4204, "step": 2818 }, { "ETA": 0.56, "epoch": 0.9065766200353754, "fp16_scale": 1.0, "global_step": 2819, "grad_norm": 1.952491669989993, "learning_rate": 4.530894544644426e-08, "loss": 0.4794, "step": 2819 }, { "ETA": 0.55, "epoch": 0.9068982151471298, "fp16_scale": 1.0, "global_step": 2820, "grad_norm": 2.1989145210470182, "learning_rate": 4.499936960849226e-08, "loss": 0.3845, "step": 2820 }, { "ETA": 0.55, "epoch": 0.9072198102588841, "fp16_scale": 1.0, "global_step": 2821, "grad_norm": 2.3718244597709783, "learning_rate": 4.4690830651098244e-08, "loss": 0.4097, "step": 2821 }, { "ETA": 0.55, "epoch": 0.9075414053706383, "fp16_scale": 1.0, "global_step": 2822, "grad_norm": 1.9915383448367294, "learning_rate": 4.4383328909255e-08, "loss": 0.4298, "step": 2822 }, { "ETA": 0.55, "epoch": 0.9078630004823927, "fp16_scale": 1.0, "global_step": 2823, "grad_norm": 1.9772001686657124, "learning_rate": 4.40768647168287e-08, "loss": 0.4083, "step": 2823 }, { "ETA": 0.55, "epoch": 0.908184595594147, "fp16_scale": 1.0, "global_step": 2824, "grad_norm": 2.0575240055420942, "learning_rate": 4.377143840655917e-08, "loss": 0.375, "step": 2824 }, { "ETA": 0.54, "epoch": 0.9085061907059012, "fp16_scale": 1.0, "global_step": 2825, "grad_norm": 1.848169752420248, "learning_rate": 4.34670503100596e-08, "loss": 0.4511, "step": 2825 }, { "ETA": 0.54, "epoch": 0.9088277858176556, "fp16_scale": 1.0, "global_step": 2826, "grad_norm": 2.0723450252659372, "learning_rate": 4.316370075781573e-08, "loss": 0.3619, "step": 2826 }, { "ETA": 0.54, "epoch": 0.9091493809294099, "fp16_scale": 1.0, "global_step": 2827, "grad_norm": 2.0247285800808315, "learning_rate": 4.286139007918566e-08, "loss": 0.4377, "step": 2827 }, { "ETA": 0.54, "epoch": 0.9094709760411642, "fp16_scale": 1.0, "global_step": 2828, "grad_norm": 1.939722941084154, "learning_rate": 4.2560118602399386e-08, "loss": 0.4625, "step": 2828 }, { "ETA": 0.54, "epoch": 0.9097925711529185, "fp16_scale": 1.0, "global_step": 2829, "grad_norm": 2.1283893092626305, "learning_rate": 4.225988665455904e-08, "loss": 0.4464, "step": 2829 }, { "ETA": 0.53, "epoch": 0.9101141662646728, "fp16_scale": 1.0, "global_step": 2830, "grad_norm": 1.9020144535129746, "learning_rate": 4.1960694561637864e-08, "loss": 0.3999, "step": 2830 }, { "ETA": 0.53, "epoch": 0.9104357613764271, "fp16_scale": 1.0, "global_step": 2831, "grad_norm": 1.7322995221783446, "learning_rate": 4.166254264848024e-08, "loss": 0.4352, "step": 2831 }, { "ETA": 0.53, "epoch": 0.9107573564881813, "fp16_scale": 1.0, "global_step": 2832, "grad_norm": 1.9560527597958781, "learning_rate": 4.136543123880088e-08, "loss": 0.4196, "step": 2832 }, { "ETA": 0.53, "epoch": 0.9110789515999357, "fp16_scale": 1.0, "global_step": 2833, "grad_norm": 2.1021864770481207, "learning_rate": 4.1069360655184846e-08, "loss": 0.435, "step": 2833 }, { "ETA": 0.53, "epoch": 0.91140054671169, "fp16_scale": 1.0, "global_step": 2834, "grad_norm": 1.7474690163098885, "learning_rate": 4.077433121908747e-08, "loss": 0.3684, "step": 2834 }, { "ETA": 0.52, "epoch": 0.9117221418234442, "fp16_scale": 1.0, "global_step": 2835, "grad_norm": 1.992238578274917, "learning_rate": 4.048034325083327e-08, "loss": 0.4108, "step": 2835 }, { "ETA": 0.52, "epoch": 0.9120437369351986, "fp16_scale": 1.0, "global_step": 2836, "grad_norm": 2.0955800830552187, "learning_rate": 4.018739706961649e-08, "loss": 0.4941, "step": 2836 }, { "ETA": 0.52, "epoch": 0.9123653320469529, "fp16_scale": 1.0, "global_step": 2837, "grad_norm": 2.0180449576883186, "learning_rate": 3.9895492993499594e-08, "loss": 0.3799, "step": 2837 }, { "ETA": 0.52, "epoch": 0.9126869271587071, "fp16_scale": 1.0, "global_step": 2838, "grad_norm": 1.7623029939806376, "learning_rate": 3.9604631339414276e-08, "loss": 0.3238, "step": 2838 }, { "ETA": 0.52, "epoch": 0.9130085222704615, "fp16_scale": 1.0, "global_step": 2839, "grad_norm": 1.939639310069537, "learning_rate": 3.9314812423159924e-08, "loss": 0.4883, "step": 2839 }, { "ETA": 0.51, "epoch": 0.9133301173822158, "fp16_scale": 1.0, "global_step": 2840, "grad_norm": 1.7967992231208605, "learning_rate": 3.902603655940384e-08, "loss": 0.3382, "step": 2840 }, { "ETA": 0.51, "epoch": 0.91365171249397, "fp16_scale": 1.0, "global_step": 2841, "grad_norm": 1.9918374371163514, "learning_rate": 3.87383040616811e-08, "loss": 0.4233, "step": 2841 }, { "ETA": 0.51, "epoch": 0.9139733076057244, "fp16_scale": 1.0, "global_step": 2842, "grad_norm": 2.4450115166099176, "learning_rate": 3.845161524239393e-08, "loss": 0.4185, "step": 2842 }, { "ETA": 0.51, "epoch": 0.9142949027174787, "fp16_scale": 1.0, "global_step": 2843, "grad_norm": 2.137554399172394, "learning_rate": 3.816597041281144e-08, "loss": 0.4271, "step": 2843 }, { "ETA": 0.51, "epoch": 0.914616497829233, "fp16_scale": 1.0, "global_step": 2844, "grad_norm": 2.149222564381253, "learning_rate": 3.788136988306878e-08, "loss": 0.3694, "step": 2844 }, { "ETA": 0.51, "epoch": 0.9149380929409873, "fp16_scale": 1.0, "global_step": 2845, "grad_norm": 1.8491213389245522, "learning_rate": 3.7597813962167654e-08, "loss": 0.4423, "step": 2845 }, { "ETA": 0.5, "epoch": 0.9152596880527416, "fp16_scale": 1.0, "global_step": 2846, "grad_norm": 2.031649759451373, "learning_rate": 3.731530295797558e-08, "loss": 0.4607, "step": 2846 }, { "ETA": 0.5, "epoch": 0.9155812831644959, "fp16_scale": 1.0, "global_step": 2847, "grad_norm": 1.9071908232824537, "learning_rate": 3.7033837177225415e-08, "loss": 0.3631, "step": 2847 }, { "ETA": 0.5, "epoch": 0.9159028782762502, "fp16_scale": 1.0, "global_step": 2848, "grad_norm": 1.8521844096559676, "learning_rate": 3.675341692551559e-08, "loss": 0.4, "step": 2848 }, { "ETA": 0.5, "epoch": 0.9162244733880045, "fp16_scale": 1.0, "global_step": 2849, "grad_norm": 1.8572444407739268, "learning_rate": 3.647404250730879e-08, "loss": 0.42, "step": 2849 }, { "ETA": 0.5, "epoch": 0.9165460684997588, "fp16_scale": 1.0, "global_step": 2850, "grad_norm": 1.9641467246546185, "learning_rate": 3.619571422593248e-08, "loss": 0.4721, "step": 2850 }, { "ETA": 0.49, "epoch": 0.9168676636115131, "fp16_scale": 1.0, "global_step": 2851, "grad_norm": 2.026162989565498, "learning_rate": 3.591843238357828e-08, "loss": 0.327, "step": 2851 }, { "ETA": 0.49, "epoch": 0.9171892587232674, "fp16_scale": 1.0, "global_step": 2852, "grad_norm": 1.9214575809984122, "learning_rate": 3.5642197281301576e-08, "loss": 0.4278, "step": 2852 }, { "ETA": 0.49, "epoch": 0.9175108538350217, "fp16_scale": 1.0, "global_step": 2853, "grad_norm": 1.9719888765763525, "learning_rate": 3.536700921902169e-08, "loss": 0.4269, "step": 2853 }, { "ETA": 0.49, "epoch": 0.917832448946776, "fp16_scale": 1.0, "global_step": 2854, "grad_norm": 2.030921485554089, "learning_rate": 3.509286849552029e-08, "loss": 0.42, "step": 2854 }, { "ETA": 0.49, "epoch": 0.9181540440585303, "fp16_scale": 1.0, "global_step": 2855, "grad_norm": 2.16948803003424, "learning_rate": 3.481977540844283e-08, "loss": 0.3734, "step": 2855 }, { "ETA": 0.48, "epoch": 0.9184756391702846, "fp16_scale": 1.0, "global_step": 2856, "grad_norm": 2.1816436495070497, "learning_rate": 3.454773025429658e-08, "loss": 0.3648, "step": 2856 }, { "ETA": 0.48, "epoch": 0.918797234282039, "fp16_scale": 1.0, "global_step": 2857, "grad_norm": 2.0823465631539473, "learning_rate": 3.427673332845138e-08, "loss": 0.4859, "step": 2857 }, { "ETA": 0.48, "epoch": 0.9191188293937932, "fp16_scale": 1.0, "global_step": 2858, "grad_norm": 1.9732601534126495, "learning_rate": 3.4006784925139085e-08, "loss": 0.4306, "step": 2858 }, { "ETA": 0.48, "epoch": 0.9194404245055475, "fp16_scale": 1.0, "global_step": 2859, "grad_norm": 2.171560970560878, "learning_rate": 3.373788533745281e-08, "loss": 0.3704, "step": 2859 }, { "ETA": 0.48, "epoch": 0.9197620196173019, "fp16_scale": 1.0, "global_step": 2860, "grad_norm": 1.952877951419747, "learning_rate": 3.347003485734712e-08, "loss": 0.3489, "step": 2860 }, { "ETA": 0.47, "epoch": 0.9200836147290561, "fp16_scale": 1.0, "global_step": 2861, "grad_norm": 1.7911008016079333, "learning_rate": 3.3203233775637494e-08, "loss": 0.3741, "step": 2861 }, { "ETA": 0.47, "epoch": 0.9204052098408104, "fp16_scale": 1.0, "global_step": 2862, "grad_norm": 1.9835673260363862, "learning_rate": 3.2937482381999895e-08, "loss": 0.483, "step": 2862 }, { "ETA": 0.47, "epoch": 0.9207268049525648, "fp16_scale": 1.0, "global_step": 2863, "grad_norm": 2.1148827379064876, "learning_rate": 3.267278096497084e-08, "loss": 0.3681, "step": 2863 }, { "ETA": 0.47, "epoch": 0.921048400064319, "fp16_scale": 1.0, "global_step": 2864, "grad_norm": 2.0236888634451855, "learning_rate": 3.2409129811946765e-08, "loss": 0.4312, "step": 2864 }, { "ETA": 0.47, "epoch": 0.9213699951760733, "fp16_scale": 1.0, "global_step": 2865, "grad_norm": 2.039835627064776, "learning_rate": 3.214652920918393e-08, "loss": 0.3642, "step": 2865 }, { "ETA": 0.46, "epoch": 0.9216915902878277, "fp16_scale": 1.0, "global_step": 2866, "grad_norm": 2.122661662967067, "learning_rate": 3.1884979441797576e-08, "loss": 0.462, "step": 2866 }, { "ETA": 0.46, "epoch": 0.9220131853995819, "fp16_scale": 1.0, "global_step": 2867, "grad_norm": 2.060438498644924, "learning_rate": 3.162448079376212e-08, "loss": 0.3579, "step": 2867 }, { "ETA": 0.46, "epoch": 0.9223347805113362, "fp16_scale": 1.0, "global_step": 2868, "grad_norm": 1.94447250213451, "learning_rate": 3.136503354791109e-08, "loss": 0.4399, "step": 2868 }, { "ETA": 0.46, "epoch": 0.9226563756230906, "fp16_scale": 1.0, "global_step": 2869, "grad_norm": 2.1724875728398736, "learning_rate": 3.1106637985936155e-08, "loss": 0.384, "step": 2869 }, { "ETA": 0.46, "epoch": 0.9229779707348448, "fp16_scale": 1.0, "global_step": 2870, "grad_norm": 1.9524800347584632, "learning_rate": 3.084929438838746e-08, "loss": 0.455, "step": 2870 }, { "ETA": 0.45, "epoch": 0.9232995658465991, "fp16_scale": 1.0, "global_step": 2871, "grad_norm": 2.0985641238180297, "learning_rate": 3.059300303467238e-08, "loss": 0.381, "step": 2871 }, { "ETA": 0.45, "epoch": 0.9236211609583534, "fp16_scale": 1.0, "global_step": 2872, "grad_norm": 2.0603571877937856, "learning_rate": 3.033776420305656e-08, "loss": 0.4047, "step": 2872 }, { "ETA": 0.45, "epoch": 0.9239427560701078, "fp16_scale": 1.0, "global_step": 2873, "grad_norm": 2.0350019436918396, "learning_rate": 3.0083578170662095e-08, "loss": 0.378, "step": 2873 }, { "ETA": 0.45, "epoch": 0.924264351181862, "fp16_scale": 1.0, "global_step": 2874, "grad_norm": 1.920722594905657, "learning_rate": 2.983044521346878e-08, "loss": 0.3673, "step": 2874 }, { "ETA": 0.45, "epoch": 0.9245859462936163, "fp16_scale": 1.0, "global_step": 2875, "grad_norm": 1.9377182625281728, "learning_rate": 2.957836560631266e-08, "loss": 0.4096, "step": 2875 }, { "ETA": 0.45, "epoch": 0.9249075414053707, "fp16_scale": 1.0, "global_step": 2876, "grad_norm": 2.1329634427360338, "learning_rate": 2.9327339622886027e-08, "loss": 0.4077, "step": 2876 }, { "ETA": 0.44, "epoch": 0.9252291365171249, "fp16_scale": 1.0, "global_step": 2877, "grad_norm": 2.0851122865652125, "learning_rate": 2.907736753573764e-08, "loss": 0.3713, "step": 2877 }, { "ETA": 0.44, "epoch": 0.9255507316288792, "fp16_scale": 1.0, "global_step": 2878, "grad_norm": 2.1182511867806917, "learning_rate": 2.88284496162714e-08, "loss": 0.4121, "step": 2878 }, { "ETA": 0.44, "epoch": 0.9258723267406336, "fp16_scale": 1.0, "global_step": 2879, "grad_norm": 1.686680202974622, "learning_rate": 2.8580586134746898e-08, "loss": 0.4095, "step": 2879 }, { "ETA": 0.44, "epoch": 0.9261939218523878, "fp16_scale": 1.0, "global_step": 2880, "grad_norm": 2.003043949217861, "learning_rate": 2.833377736027931e-08, "loss": 0.4378, "step": 2880 }, { "ETA": 0.44, "epoch": 0.9265155169641421, "fp16_scale": 1.0, "global_step": 2881, "grad_norm": 2.0132547667998177, "learning_rate": 2.8088023560838058e-08, "loss": 0.4162, "step": 2881 }, { "ETA": 0.43, "epoch": 0.9268371120758965, "fp16_scale": 1.0, "global_step": 2882, "grad_norm": 1.9477678640121898, "learning_rate": 2.7843325003247707e-08, "loss": 0.3662, "step": 2882 }, { "ETA": 0.43, "epoch": 0.9271587071876507, "fp16_scale": 1.0, "global_step": 2883, "grad_norm": 2.03755480900658, "learning_rate": 2.7599681953186405e-08, "loss": 0.4026, "step": 2883 }, { "ETA": 0.43, "epoch": 0.927480302299405, "fp16_scale": 1.0, "global_step": 2884, "grad_norm": 1.896138339130577, "learning_rate": 2.7357094675186987e-08, "loss": 0.4692, "step": 2884 }, { "ETA": 0.43, "epoch": 0.9278018974111594, "fp16_scale": 1.0, "global_step": 2885, "grad_norm": 1.8473039605003492, "learning_rate": 2.7115563432635547e-08, "loss": 0.3465, "step": 2885 }, { "ETA": 0.43, "epoch": 0.9281234925229137, "fp16_scale": 1.0, "global_step": 2886, "grad_norm": 1.7983228307103274, "learning_rate": 2.6875088487771757e-08, "loss": 0.3294, "step": 2886 }, { "ETA": 0.42, "epoch": 0.9284450876346679, "fp16_scale": 1.0, "global_step": 2887, "grad_norm": 2.0419374524211076, "learning_rate": 2.6635670101688547e-08, "loss": 0.4591, "step": 2887 }, { "ETA": 0.42, "epoch": 0.9287666827464223, "fp16_scale": 1.0, "global_step": 2888, "grad_norm": 2.0402655206198377, "learning_rate": 2.639730853433142e-08, "loss": 0.4047, "step": 2888 }, { "ETA": 0.42, "epoch": 0.9290882778581766, "fp16_scale": 1.0, "global_step": 2889, "grad_norm": 3.28262931467128, "learning_rate": 2.6160004044498808e-08, "loss": 0.3441, "step": 2889 }, { "ETA": 0.42, "epoch": 0.9294098729699308, "fp16_scale": 1.0, "global_step": 2890, "grad_norm": 1.938396177219089, "learning_rate": 2.5923756889841052e-08, "loss": 0.3975, "step": 2890 }, { "ETA": 0.42, "epoch": 0.9297314680816852, "fp16_scale": 1.0, "global_step": 2891, "grad_norm": 1.966063074085021, "learning_rate": 2.5688567326860644e-08, "loss": 0.4875, "step": 2891 }, { "ETA": 0.41, "epoch": 0.9300530631934395, "fp16_scale": 1.0, "global_step": 2892, "grad_norm": 1.8360764536400855, "learning_rate": 2.5454435610912095e-08, "loss": 0.417, "step": 2892 }, { "ETA": 0.41, "epoch": 0.9303746583051937, "fp16_scale": 1.0, "global_step": 2893, "grad_norm": 1.75421471852118, "learning_rate": 2.5221361996200952e-08, "loss": 0.4182, "step": 2893 }, { "ETA": 0.41, "epoch": 0.9306962534169481, "fp16_scale": 1.0, "global_step": 2894, "grad_norm": 1.9087059051516082, "learning_rate": 2.4989346735784124e-08, "loss": 0.4365, "step": 2894 }, { "ETA": 0.41, "epoch": 0.9310178485287024, "fp16_scale": 1.0, "global_step": 2895, "grad_norm": 2.0869932193712404, "learning_rate": 2.4758390081569436e-08, "loss": 0.4637, "step": 2895 }, { "ETA": 0.41, "epoch": 0.9313394436404566, "fp16_scale": 1.0, "global_step": 2896, "grad_norm": 2.237552466479958, "learning_rate": 2.4528492284315305e-08, "loss": 0.4443, "step": 2896 }, { "ETA": 0.4, "epoch": 0.931661038752211, "fp16_scale": 1.0, "global_step": 2897, "grad_norm": 2.198283922950274, "learning_rate": 2.429965359363073e-08, "loss": 0.3911, "step": 2897 }, { "ETA": 0.4, "epoch": 0.9319826338639653, "fp16_scale": 1.0, "global_step": 2898, "grad_norm": 2.0013742597536557, "learning_rate": 2.407187425797419e-08, "loss": 0.4223, "step": 2898 }, { "ETA": 0.4, "epoch": 0.9323042289757195, "fp16_scale": 1.0, "global_step": 2899, "grad_norm": 1.895460601105781, "learning_rate": 2.384515452465474e-08, "loss": 0.475, "step": 2899 }, { "ETA": 0.4, "epoch": 0.9326258240874739, "fp16_scale": 1.0, "global_step": 2900, "grad_norm": 2.3961486345211536, "learning_rate": 2.3619494639830374e-08, "loss": 0.4462, "step": 2900 }, { "ETA": 0.4, "epoch": 0.9329474191992282, "fp16_scale": 1.0, "global_step": 2901, "grad_norm": 2.092962793312189, "learning_rate": 2.3394894848508874e-08, "loss": 0.4551, "step": 2901 }, { "ETA": 0.4, "epoch": 0.9332690143109825, "fp16_scale": 1.0, "global_step": 2902, "grad_norm": 1.8940398717493065, "learning_rate": 2.317135539454662e-08, "loss": 0.4258, "step": 2902 }, { "ETA": 0.39, "epoch": 0.9335906094227368, "fp16_scale": 1.0, "global_step": 2903, "grad_norm": 1.8358028292874564, "learning_rate": 2.2948876520648917e-08, "loss": 0.4338, "step": 2903 }, { "ETA": 0.39, "epoch": 0.9339122045344911, "fp16_scale": 1.0, "global_step": 2904, "grad_norm": 2.0039504383224016, "learning_rate": 2.2727458468369654e-08, "loss": 0.4437, "step": 2904 }, { "ETA": 0.39, "epoch": 0.9342337996462454, "fp16_scale": 1.0, "global_step": 2905, "grad_norm": 1.9121237637779611, "learning_rate": 2.2507101478110745e-08, "loss": 0.4524, "step": 2905 }, { "ETA": 0.39, "epoch": 0.9345553947579996, "fp16_scale": 1.0, "global_step": 2906, "grad_norm": 2.0617501161978526, "learning_rate": 2.228780578912226e-08, "loss": 0.4526, "step": 2906 }, { "ETA": 0.39, "epoch": 0.934876989869754, "fp16_scale": 1.0, "global_step": 2907, "grad_norm": 1.9074988731134586, "learning_rate": 2.206957163950174e-08, "loss": 0.4001, "step": 2907 }, { "ETA": 0.38, "epoch": 0.9351985849815083, "fp16_scale": 1.0, "global_step": 2908, "grad_norm": 2.522027191574107, "learning_rate": 2.185239926619431e-08, "loss": 0.4575, "step": 2908 }, { "ETA": 0.38, "epoch": 0.9355201800932625, "fp16_scale": 1.0, "global_step": 2909, "grad_norm": 1.9950749602428057, "learning_rate": 2.1636288904992585e-08, "loss": 0.4594, "step": 2909 }, { "ETA": 0.38, "epoch": 0.9358417752050169, "fp16_scale": 1.0, "global_step": 2910, "grad_norm": 2.405769795260131, "learning_rate": 2.1421240790535424e-08, "loss": 0.4531, "step": 2910 }, { "ETA": 0.38, "epoch": 0.9361633703167712, "fp16_scale": 1.0, "global_step": 2911, "grad_norm": 2.0040134486983425, "learning_rate": 2.1207255156309056e-08, "loss": 0.4508, "step": 2911 }, { "ETA": 0.38, "epoch": 0.9364849654285254, "fp16_scale": 1.0, "global_step": 2912, "grad_norm": 1.9088454468379767, "learning_rate": 2.099433223464564e-08, "loss": 0.4288, "step": 2912 }, { "ETA": 0.37, "epoch": 0.9368065605402798, "fp16_scale": 1.0, "global_step": 2913, "grad_norm": 2.059182017213907, "learning_rate": 2.0782472256723803e-08, "loss": 0.4029, "step": 2913 }, { "ETA": 0.37, "epoch": 0.9371281556520341, "fp16_scale": 1.0, "global_step": 2914, "grad_norm": 2.069901122293247, "learning_rate": 2.0571675452567993e-08, "loss": 0.3054, "step": 2914 }, { "ETA": 0.37, "epoch": 0.9374497507637883, "fp16_scale": 1.0, "global_step": 2915, "grad_norm": 1.9778287081296801, "learning_rate": 2.0361942051048242e-08, "loss": 0.4049, "step": 2915 }, { "ETA": 0.37, "epoch": 0.9377713458755427, "fp16_scale": 1.0, "global_step": 2916, "grad_norm": 1.9012498076195208, "learning_rate": 2.0153272279880173e-08, "loss": 0.4042, "step": 2916 }, { "ETA": 0.37, "epoch": 0.938092940987297, "fp16_scale": 1.0, "global_step": 2917, "grad_norm": 2.0072285388411335, "learning_rate": 1.9945666365624447e-08, "loss": 0.3625, "step": 2917 }, { "ETA": 0.36, "epoch": 0.9384145360990513, "fp16_scale": 1.0, "global_step": 2918, "grad_norm": 1.9604109361099546, "learning_rate": 1.973912453368676e-08, "loss": 0.3789, "step": 2918 }, { "ETA": 0.36, "epoch": 0.9387361312108056, "fp16_scale": 1.0, "global_step": 2919, "grad_norm": 1.8108215220097155, "learning_rate": 1.95336470083175e-08, "loss": 0.3918, "step": 2919 }, { "ETA": 0.36, "epoch": 0.9390577263225599, "fp16_scale": 1.0, "global_step": 2920, "grad_norm": 2.2514629786650167, "learning_rate": 1.9329234012611327e-08, "loss": 0.3537, "step": 2920 }, { "ETA": 0.36, "epoch": 0.9393793214343142, "fp16_scale": 1.0, "global_step": 2921, "grad_norm": 1.986437118979266, "learning_rate": 1.9125885768507267e-08, "loss": 0.5235, "step": 2921 }, { "ETA": 0.36, "epoch": 0.9397009165460685, "fp16_scale": 1.0, "global_step": 2922, "grad_norm": 1.9941076820573982, "learning_rate": 1.892360249678826e-08, "loss": 0.4799, "step": 2922 }, { "ETA": 0.36, "epoch": 0.9400225116578228, "fp16_scale": 1.0, "global_step": 2923, "grad_norm": 2.317297842742614, "learning_rate": 1.872238441708085e-08, "loss": 0.3844, "step": 2923 }, { "ETA": 0.35, "epoch": 0.9403441067695771, "fp16_scale": 1.0, "global_step": 2924, "grad_norm": 1.9462108083364182, "learning_rate": 1.8522231747855388e-08, "loss": 0.4104, "step": 2924 }, { "ETA": 0.35, "epoch": 0.9406657018813314, "fp16_scale": 1.0, "global_step": 2925, "grad_norm": 1.8474139095167101, "learning_rate": 1.8323144706425155e-08, "loss": 0.4414, "step": 2925 }, { "ETA": 0.35, "epoch": 0.9409872969930857, "fp16_scale": 1.0, "global_step": 2926, "grad_norm": 1.924080350838665, "learning_rate": 1.812512350894646e-08, "loss": 0.3743, "step": 2926 }, { "ETA": 0.35, "epoch": 0.94130889210484, "fp16_scale": 1.0, "global_step": 2927, "grad_norm": 2.0265808388741546, "learning_rate": 1.792816837041844e-08, "loss": 0.4951, "step": 2927 }, { "ETA": 0.35, "epoch": 0.9416304872165943, "fp16_scale": 1.0, "global_step": 2928, "grad_norm": 2.1163588652864913, "learning_rate": 1.7732279504683034e-08, "loss": 0.4028, "step": 2928 }, { "ETA": 0.34, "epoch": 0.9419520823283486, "fp16_scale": 1.0, "global_step": 2929, "grad_norm": 2.2452526256173764, "learning_rate": 1.7537457124423893e-08, "loss": 0.4152, "step": 2929 }, { "ETA": 0.34, "epoch": 0.9422736774401029, "fp16_scale": 1.0, "global_step": 2930, "grad_norm": 1.951578725577269, "learning_rate": 1.7343701441167258e-08, "loss": 0.5007, "step": 2930 }, { "ETA": 0.34, "epoch": 0.9425952725518573, "fp16_scale": 1.0, "global_step": 2931, "grad_norm": 2.0595012579859735, "learning_rate": 1.7151012665281183e-08, "loss": 0.346, "step": 2931 }, { "ETA": 0.34, "epoch": 0.9429168676636115, "fp16_scale": 1.0, "global_step": 2932, "grad_norm": 1.8322769366624494, "learning_rate": 1.6959391005975098e-08, "loss": 0.4124, "step": 2932 }, { "ETA": 0.34, "epoch": 0.9432384627753658, "fp16_scale": 1.0, "global_step": 2933, "grad_norm": 1.9282113927559967, "learning_rate": 1.6768836671299912e-08, "loss": 0.4012, "step": 2933 }, { "ETA": 0.33, "epoch": 0.9435600578871202, "fp16_scale": 1.0, "global_step": 2934, "grad_norm": 2.1884584348146543, "learning_rate": 1.6579349868147686e-08, "loss": 0.5475, "step": 2934 }, { "ETA": 0.33, "epoch": 0.9438816529988744, "fp16_scale": 1.0, "global_step": 2935, "grad_norm": 2.073075463862458, "learning_rate": 1.6390930802251624e-08, "loss": 0.3919, "step": 2935 }, { "ETA": 0.33, "epoch": 0.9442032481106287, "fp16_scale": 1.0, "global_step": 2936, "grad_norm": 1.9712325183264225, "learning_rate": 1.620357967818531e-08, "loss": 0.43, "step": 2936 }, { "ETA": 0.33, "epoch": 0.9445248432223831, "fp16_scale": 1.0, "global_step": 2937, "grad_norm": 2.197163721181814, "learning_rate": 1.6017296699363138e-08, "loss": 0.5202, "step": 2937 }, { "ETA": 0.33, "epoch": 0.9448464383341373, "fp16_scale": 1.0, "global_step": 2938, "grad_norm": 1.919809845775633, "learning_rate": 1.583208206803954e-08, "loss": 0.3665, "step": 2938 }, { "ETA": 0.32, "epoch": 0.9451680334458916, "fp16_scale": 1.0, "global_step": 2939, "grad_norm": 1.8314609230798584, "learning_rate": 1.5647935985309003e-08, "loss": 0.4703, "step": 2939 }, { "ETA": 0.32, "epoch": 0.945489628557646, "fp16_scale": 1.0, "global_step": 2940, "grad_norm": 2.0837053070772114, "learning_rate": 1.5464858651106138e-08, "loss": 0.4618, "step": 2940 }, { "ETA": 0.32, "epoch": 0.9458112236694002, "fp16_scale": 1.0, "global_step": 2941, "grad_norm": 1.7579175386781978, "learning_rate": 1.528285026420484e-08, "loss": 0.3877, "step": 2941 }, { "ETA": 0.32, "epoch": 0.9461328187811545, "fp16_scale": 1.0, "global_step": 2942, "grad_norm": 1.8483540718218372, "learning_rate": 1.5101911022218693e-08, "loss": 0.4942, "step": 2942 }, { "ETA": 0.32, "epoch": 0.9464544138929089, "fp16_scale": 1.0, "global_step": 2943, "grad_norm": 1.9770167749618421, "learning_rate": 1.4922041121600337e-08, "loss": 0.3881, "step": 2943 }, { "ETA": 0.32, "epoch": 0.9467760090046631, "fp16_scale": 1.0, "global_step": 2944, "grad_norm": 2.0364922257199267, "learning_rate": 1.4743240757641107e-08, "loss": 0.462, "step": 2944 }, { "ETA": 0.31, "epoch": 0.9470976041164174, "fp16_scale": 1.0, "global_step": 2945, "grad_norm": 2.45622442544849, "learning_rate": 1.4565510124471492e-08, "loss": 0.3526, "step": 2945 }, { "ETA": 0.31, "epoch": 0.9474191992281717, "fp16_scale": 1.0, "global_step": 2946, "grad_norm": 2.015600297041838, "learning_rate": 1.4388849415060466e-08, "loss": 0.3525, "step": 2946 }, { "ETA": 0.31, "epoch": 0.9477407943399261, "fp16_scale": 1.0, "global_step": 2947, "grad_norm": 2.1377403208434806, "learning_rate": 1.4213258821215379e-08, "loss": 0.4307, "step": 2947 }, { "ETA": 0.31, "epoch": 0.9480623894516803, "fp16_scale": 1.0, "global_step": 2948, "grad_norm": 1.7641667527380287, "learning_rate": 1.4038738533581617e-08, "loss": 0.3583, "step": 2948 }, { "ETA": 0.31, "epoch": 0.9483839845634346, "fp16_scale": 1.0, "global_step": 2949, "grad_norm": 1.7784574412022152, "learning_rate": 1.3865288741642168e-08, "loss": 0.4862, "step": 2949 }, { "ETA": 0.3, "epoch": 0.948705579675189, "fp16_scale": 1.0, "global_step": 2950, "grad_norm": 1.9576693109144088, "learning_rate": 1.3692909633718497e-08, "loss": 0.4525, "step": 2950 }, { "ETA": 0.3, "epoch": 0.9490271747869432, "fp16_scale": 1.0, "global_step": 2951, "grad_norm": 2.133508948989494, "learning_rate": 1.3521601396968896e-08, "loss": 0.4069, "step": 2951 }, { "ETA": 0.3, "epoch": 0.9493487698986975, "fp16_scale": 1.0, "global_step": 2952, "grad_norm": 1.8553115893701249, "learning_rate": 1.3351364217389249e-08, "loss": 0.4627, "step": 2952 }, { "ETA": 0.3, "epoch": 0.9496703650104519, "fp16_scale": 1.0, "global_step": 2953, "grad_norm": 1.9719888974956736, "learning_rate": 1.3182198279812816e-08, "loss": 0.4568, "step": 2953 }, { "ETA": 0.3, "epoch": 0.9499919601222061, "fp16_scale": 1.0, "global_step": 2954, "grad_norm": 2.0719174871476014, "learning_rate": 1.3014103767909235e-08, "loss": 0.3627, "step": 2954 }, { "ETA": 0.29, "epoch": 0.9503135552339604, "fp16_scale": 1.0, "global_step": 2955, "grad_norm": 2.087689015845033, "learning_rate": 1.2847080864185177e-08, "loss": 0.4018, "step": 2955 }, { "ETA": 0.29, "epoch": 0.9506351503457148, "fp16_scale": 1.0, "global_step": 2956, "grad_norm": 1.9434984481486384, "learning_rate": 1.2681129749983809e-08, "loss": 0.4133, "step": 2956 }, { "ETA": 0.29, "epoch": 0.950956745457469, "fp16_scale": 1.0, "global_step": 2957, "grad_norm": 1.9024416054546027, "learning_rate": 1.2516250605484558e-08, "loss": 0.4437, "step": 2957 }, { "ETA": 0.29, "epoch": 0.9512783405692233, "fp16_scale": 1.0, "global_step": 2958, "grad_norm": 2.020528747188203, "learning_rate": 1.2352443609703e-08, "loss": 0.394, "step": 2958 }, { "ETA": 0.29, "epoch": 0.9515999356809777, "fp16_scale": 1.0, "global_step": 2959, "grad_norm": 1.8568009487685488, "learning_rate": 1.218970894049065e-08, "loss": 0.4347, "step": 2959 }, { "ETA": 0.28, "epoch": 0.9519215307927319, "fp16_scale": 1.0, "global_step": 2960, "grad_norm": 1.7086439917684098, "learning_rate": 1.2028046774534616e-08, "loss": 0.3959, "step": 2960 }, { "ETA": 0.28, "epoch": 0.9522431259044862, "fp16_scale": 1.0, "global_step": 2961, "grad_norm": 1.9627415938801038, "learning_rate": 1.186745728735783e-08, "loss": 0.3955, "step": 2961 }, { "ETA": 0.28, "epoch": 0.9525647210162406, "fp16_scale": 1.0, "global_step": 2962, "grad_norm": 1.7178424258131575, "learning_rate": 1.170794065331837e-08, "loss": 0.384, "step": 2962 }, { "ETA": 0.28, "epoch": 0.9528863161279949, "fp16_scale": 1.0, "global_step": 2963, "grad_norm": 1.923561456451965, "learning_rate": 1.1549497045609368e-08, "loss": 0.4176, "step": 2963 }, { "ETA": 0.28, "epoch": 0.9532079112397491, "fp16_scale": 1.0, "global_step": 2964, "grad_norm": 1.8165583592615953, "learning_rate": 1.1392126636259324e-08, "loss": 0.3305, "step": 2964 }, { "ETA": 0.27, "epoch": 0.9535295063515035, "fp16_scale": 1.0, "global_step": 2965, "grad_norm": 1.8570178517632707, "learning_rate": 1.123582959613123e-08, "loss": 0.4354, "step": 2965 }, { "ETA": 0.27, "epoch": 0.9538511014632578, "fp16_scale": 1.0, "global_step": 2966, "grad_norm": 2.0259558536558853, "learning_rate": 1.1080606094922562e-08, "loss": 0.3532, "step": 2966 }, { "ETA": 0.27, "epoch": 0.954172696575012, "fp16_scale": 1.0, "global_step": 2967, "grad_norm": 1.9570562339180626, "learning_rate": 1.0926456301165621e-08, "loss": 0.4097, "step": 2967 }, { "ETA": 0.27, "epoch": 0.9544942916867664, "fp16_scale": 1.0, "global_step": 2968, "grad_norm": 1.9588007205081928, "learning_rate": 1.0773380382226415e-08, "loss": 0.4465, "step": 2968 }, { "ETA": 0.27, "epoch": 0.9548158867985207, "fp16_scale": 1.0, "global_step": 2969, "grad_norm": 1.8906510859736605, "learning_rate": 1.0621378504305666e-08, "loss": 0.455, "step": 2969 }, { "ETA": 0.27, "epoch": 0.9551374819102749, "fp16_scale": 1.0, "global_step": 2970, "grad_norm": 2.0645576486022135, "learning_rate": 1.047045083243725e-08, "loss": 0.3481, "step": 2970 }, { "ETA": 0.26, "epoch": 0.9554590770220293, "fp16_scale": 1.0, "global_step": 2971, "grad_norm": 2.045479632270176, "learning_rate": 1.0320597530489417e-08, "loss": 0.4637, "step": 2971 }, { "ETA": 0.26, "epoch": 0.9557806721337836, "fp16_scale": 1.0, "global_step": 2972, "grad_norm": 1.660848396106032, "learning_rate": 1.0171818761163353e-08, "loss": 0.4502, "step": 2972 }, { "ETA": 0.26, "epoch": 0.9561022672455378, "fp16_scale": 1.0, "global_step": 2973, "grad_norm": 2.439452414713532, "learning_rate": 1.0024114685993956e-08, "loss": 0.4045, "step": 2973 }, { "ETA": 0.26, "epoch": 0.9564238623572922, "fp16_scale": 1.0, "global_step": 2974, "grad_norm": 1.7980539379155829, "learning_rate": 9.877485465349056e-09, "loss": 0.4406, "step": 2974 }, { "ETA": 0.26, "epoch": 0.9567454574690465, "fp16_scale": 1.0, "global_step": 2975, "grad_norm": 1.8403094333533125, "learning_rate": 9.731931258429638e-09, "loss": 0.4131, "step": 2975 }, { "ETA": 0.25, "epoch": 0.9570670525808008, "fp16_scale": 1.0, "global_step": 2976, "grad_norm": 1.8125355911510845, "learning_rate": 9.587452223269622e-09, "loss": 0.3938, "step": 2976 }, { "ETA": 0.25, "epoch": 0.957388647692555, "fp16_scale": 1.0, "global_step": 2977, "grad_norm": 1.8495426433052407, "learning_rate": 9.444048516735193e-09, "loss": 0.3742, "step": 2977 }, { "ETA": 0.25, "epoch": 0.9577102428043094, "fp16_scale": 1.0, "global_step": 2978, "grad_norm": 2.179984387368447, "learning_rate": 9.30172029452514e-09, "loss": 0.4132, "step": 2978 }, { "ETA": 0.25, "epoch": 0.9580318379160637, "fp16_scale": 1.0, "global_step": 2979, "grad_norm": 1.9759395060978266, "learning_rate": 9.16046771117085e-09, "loss": 0.4631, "step": 2979 }, { "ETA": 0.25, "epoch": 0.958353433027818, "fp16_scale": 1.0, "global_step": 2980, "grad_norm": 1.8666175143157948, "learning_rate": 9.020290920035534e-09, "loss": 0.4602, "step": 2980 }, { "ETA": 0.24, "epoch": 0.9586750281395723, "fp16_scale": 1.0, "global_step": 2981, "grad_norm": 2.052054203830057, "learning_rate": 8.881190073314559e-09, "loss": 0.4006, "step": 2981 }, { "ETA": 0.24, "epoch": 0.9589966232513266, "fp16_scale": 1.0, "global_step": 2982, "grad_norm": 2.0317608519735, "learning_rate": 8.743165322035007e-09, "loss": 0.464, "step": 2982 }, { "ETA": 0.24, "epoch": 0.9593182183630808, "fp16_scale": 1.0, "global_step": 2983, "grad_norm": 1.774378858660257, "learning_rate": 8.606216816055333e-09, "loss": 0.4483, "step": 2983 }, { "ETA": 0.24, "epoch": 0.9596398134748352, "fp16_scale": 1.0, "global_step": 2984, "grad_norm": 2.0955463172220608, "learning_rate": 8.470344704066046e-09, "loss": 0.3876, "step": 2984 }, { "ETA": 0.24, "epoch": 0.9599614085865895, "fp16_scale": 1.0, "global_step": 2985, "grad_norm": 2.1051645851664857, "learning_rate": 8.335549133588582e-09, "loss": 0.398, "step": 2985 }, { "ETA": 0.23, "epoch": 0.9602830036983437, "fp16_scale": 1.0, "global_step": 2986, "grad_norm": 2.029552173358812, "learning_rate": 8.20183025097565e-09, "loss": 0.3471, "step": 2986 }, { "ETA": 0.23, "epoch": 0.9606045988100981, "fp16_scale": 1.0, "global_step": 2987, "grad_norm": 2.0325420544793973, "learning_rate": 8.069188201410892e-09, "loss": 0.3908, "step": 2987 }, { "ETA": 0.23, "epoch": 0.9609261939218524, "fp16_scale": 1.0, "global_step": 2988, "grad_norm": 2.2358667514528054, "learning_rate": 7.937623128908887e-09, "loss": 0.4238, "step": 2988 }, { "ETA": 0.23, "epoch": 0.9612477890336066, "fp16_scale": 1.0, "global_step": 2989, "grad_norm": 1.8586219935882644, "learning_rate": 7.807135176314706e-09, "loss": 0.3451, "step": 2989 }, { "ETA": 0.23, "epoch": 0.961569384145361, "fp16_scale": 1.0, "global_step": 2990, "grad_norm": 1.7855331290795615, "learning_rate": 7.677724485304237e-09, "loss": 0.3704, "step": 2990 }, { "ETA": 0.23, "epoch": 0.9618909792571153, "fp16_scale": 1.0, "global_step": 2991, "grad_norm": 2.1596260546169046, "learning_rate": 7.549391196383536e-09, "loss": 0.4331, "step": 2991 }, { "ETA": 0.22, "epoch": 0.9622125743688696, "fp16_scale": 1.0, "global_step": 2992, "grad_norm": 1.9673441050483644, "learning_rate": 7.422135448889033e-09, "loss": 0.4345, "step": 2992 }, { "ETA": 0.22, "epoch": 0.9625341694806239, "fp16_scale": 1.0, "global_step": 2993, "grad_norm": 2.2208780929202523, "learning_rate": 7.295957380986983e-09, "loss": 0.3715, "step": 2993 }, { "ETA": 0.22, "epoch": 0.9628557645923782, "fp16_scale": 1.0, "global_step": 2994, "grad_norm": 2.0173989523449323, "learning_rate": 7.170857129673913e-09, "loss": 0.4656, "step": 2994 }, { "ETA": 0.22, "epoch": 0.9631773597041325, "fp16_scale": 1.0, "global_step": 2995, "grad_norm": 1.8048564034676648, "learning_rate": 7.0468348307757275e-09, "loss": 0.4128, "step": 2995 }, { "ETA": 0.22, "epoch": 0.9634989548158868, "fp16_scale": 1.0, "global_step": 2996, "grad_norm": 1.813233816899862, "learning_rate": 6.923890618948158e-09, "loss": 0.3979, "step": 2996 }, { "ETA": 0.21, "epoch": 0.9638205499276411, "fp16_scale": 1.0, "global_step": 2997, "grad_norm": 2.1973299162727242, "learning_rate": 6.80202462767665e-09, "loss": 0.439, "step": 2997 }, { "ETA": 0.21, "epoch": 0.9641421450393954, "fp16_scale": 1.0, "global_step": 2998, "grad_norm": 1.9469707157712282, "learning_rate": 6.681236989275585e-09, "loss": 0.3835, "step": 2998 }, { "ETA": 0.21, "epoch": 0.9644637401511497, "fp16_scale": 1.0, "global_step": 2999, "grad_norm": 1.8263752228757655, "learning_rate": 6.561527834888725e-09, "loss": 0.4488, "step": 2999 }, { "ETA": 0.21, "epoch": 0.964785335262904, "fp16_scale": 1.0, "global_step": 3000, "grad_norm": 1.8783427695492874, "learning_rate": 6.442897294488881e-09, "loss": 0.4241, "step": 3000 } ], "logging_steps": 1, "max_steps": 3109, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6197817830014976.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }