{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.06521030322791001, "eval_steps": 9, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006521030322791001, "eval_loss": 1.4430840015411377, "eval_runtime": 35.4265, "eval_samples_per_second": 72.911, "eval_steps_per_second": 9.117, "step": 1 }, { "epoch": 0.0019563090968373, "grad_norm": 0.13245901465415955, "learning_rate": 1.5e-05, "loss": 1.3922, "step": 3 }, { "epoch": 0.0039126181936746, "grad_norm": 0.13980551064014435, "learning_rate": 3e-05, "loss": 1.35, "step": 6 }, { "epoch": 0.0058689272905119005, "grad_norm": 0.21757693588733673, "learning_rate": 4.5e-05, "loss": 1.4342, "step": 9 }, { "epoch": 0.0058689272905119005, "eval_loss": 1.4398877620697021, "eval_runtime": 35.4144, "eval_samples_per_second": 72.936, "eval_steps_per_second": 9.121, "step": 9 }, { "epoch": 0.0078252363873492, "grad_norm": 0.16242149472236633, "learning_rate": 4.993910125649561e-05, "loss": 1.3931, "step": 12 }, { "epoch": 0.0097815454841865, "grad_norm": 0.15830300748348236, "learning_rate": 4.962019382530521e-05, "loss": 1.367, "step": 15 }, { "epoch": 0.011737854581023801, "grad_norm": 0.16229018568992615, "learning_rate": 4.9031542398457974e-05, "loss": 1.4666, "step": 18 }, { "epoch": 0.011737854581023801, "eval_loss": 1.4245867729187012, "eval_runtime": 35.2817, "eval_samples_per_second": 73.211, "eval_steps_per_second": 9.155, "step": 18 }, { "epoch": 0.013694163677861103, "grad_norm": 0.14549140632152557, "learning_rate": 4.817959636416969e-05, "loss": 1.4435, "step": 21 }, { "epoch": 0.0156504727746984, "grad_norm": 0.12280628830194473, "learning_rate": 4.707368982147318e-05, "loss": 1.4289, "step": 24 }, { "epoch": 0.017606781871535703, "grad_norm": 0.1361985057592392, "learning_rate": 4.572593931387604e-05, "loss": 1.4174, "step": 27 }, { "epoch": 0.017606781871535703, "eval_loss": 1.4081462621688843, "eval_runtime": 35.2327, "eval_samples_per_second": 73.313, "eval_steps_per_second": 9.168, "step": 27 }, { "epoch": 0.019563090968373, "grad_norm": 0.1305856853723526, "learning_rate": 4.415111107797445e-05, "loss": 1.3956, "step": 30 }, { "epoch": 0.021519400065210303, "grad_norm": 0.13506704568862915, "learning_rate": 4.2366459261474933e-05, "loss": 1.4642, "step": 33 }, { "epoch": 0.023475709162047602, "grad_norm": 0.1571059674024582, "learning_rate": 4.039153688314145e-05, "loss": 1.4487, "step": 36 }, { "epoch": 0.023475709162047602, "eval_loss": 1.3942054510116577, "eval_runtime": 35.4024, "eval_samples_per_second": 72.961, "eval_steps_per_second": 9.124, "step": 36 }, { "epoch": 0.025432018258884904, "grad_norm": 0.12497388571500778, "learning_rate": 3.824798160583012e-05, "loss": 1.345, "step": 39 }, { "epoch": 0.027388327355722206, "grad_norm": 0.1353573054075241, "learning_rate": 3.5959278669726935e-05, "loss": 1.3613, "step": 42 }, { "epoch": 0.029344636452559504, "grad_norm": 0.11737760901451111, "learning_rate": 3.355050358314172e-05, "loss": 1.3878, "step": 45 }, { "epoch": 0.029344636452559504, "eval_loss": 1.383650302886963, "eval_runtime": 35.4483, "eval_samples_per_second": 72.867, "eval_steps_per_second": 9.112, "step": 45 }, { "epoch": 0.0313009455493968, "grad_norm": 0.11926010251045227, "learning_rate": 3.104804738999169e-05, "loss": 1.4205, "step": 48 }, { "epoch": 0.03325725464623411, "grad_norm": 0.1115192323923111, "learning_rate": 2.8479327524001636e-05, "loss": 1.3493, "step": 51 }, { "epoch": 0.035213563743071406, "grad_norm": 0.11480339616537094, "learning_rate": 2.587248741756253e-05, "loss": 1.3904, "step": 54 }, { "epoch": 0.035213563743071406, "eval_loss": 1.376178503036499, "eval_runtime": 35.4933, "eval_samples_per_second": 72.774, "eval_steps_per_second": 9.1, "step": 54 }, { "epoch": 0.037169872839908705, "grad_norm": 0.11822472512722015, "learning_rate": 2.3256088156396868e-05, "loss": 1.3313, "step": 57 }, { "epoch": 0.039126181936746, "grad_norm": 0.1461370587348938, "learning_rate": 2.0658795558326743e-05, "loss": 1.3558, "step": 60 }, { "epoch": 0.04108249103358331, "grad_norm": 0.13045533001422882, "learning_rate": 1.8109066104575023e-05, "loss": 1.2992, "step": 63 }, { "epoch": 0.04108249103358331, "eval_loss": 1.3712804317474365, "eval_runtime": 35.3891, "eval_samples_per_second": 72.989, "eval_steps_per_second": 9.127, "step": 63 }, { "epoch": 0.04303880013042061, "grad_norm": 0.13522818684577942, "learning_rate": 1.56348351646022e-05, "loss": 1.244, "step": 66 }, { "epoch": 0.044995109227257905, "grad_norm": 0.1401805877685547, "learning_rate": 1.3263210930352737e-05, "loss": 1.3158, "step": 69 }, { "epoch": 0.046951418324095204, "grad_norm": 0.16507139801979065, "learning_rate": 1.1020177413231334e-05, "loss": 1.3923, "step": 72 }, { "epoch": 0.046951418324095204, "eval_loss": 1.368349313735962, "eval_runtime": 35.4333, "eval_samples_per_second": 72.898, "eval_steps_per_second": 9.116, "step": 72 }, { "epoch": 0.04890772742093251, "grad_norm": 0.1617535650730133, "learning_rate": 8.930309757836517e-06, "loss": 1.4659, "step": 75 }, { "epoch": 0.05086403651776981, "grad_norm": 0.15123188495635986, "learning_rate": 7.016504991533726e-06, "loss": 1.4194, "step": 78 }, { "epoch": 0.052820345614607106, "grad_norm": 0.1255597174167633, "learning_rate": 5.299731159831953e-06, "loss": 1.3503, "step": 81 }, { "epoch": 0.052820345614607106, "eval_loss": 1.3665692806243896, "eval_runtime": 35.4134, "eval_samples_per_second": 72.938, "eval_steps_per_second": 9.121, "step": 81 }, { "epoch": 0.05477665471144441, "grad_norm": 0.1431252807378769, "learning_rate": 3.798797596089351e-06, "loss": 1.3153, "step": 84 }, { "epoch": 0.05673296380828171, "grad_norm": 0.1532163769006729, "learning_rate": 2.5301488425208296e-06, "loss": 1.3877, "step": 87 }, { "epoch": 0.05868927290511901, "grad_norm": 0.15569448471069336, "learning_rate": 1.5076844803522922e-06, "loss": 1.4249, "step": 90 }, { "epoch": 0.05868927290511901, "eval_loss": 1.3660001754760742, "eval_runtime": 35.493, "eval_samples_per_second": 72.775, "eval_steps_per_second": 9.1, "step": 90 }, { "epoch": 0.06064558200195631, "grad_norm": 0.1278517246246338, "learning_rate": 7.426068431000882e-07, "loss": 1.4409, "step": 93 }, { "epoch": 0.0626018910987936, "grad_norm": 0.10912717878818512, "learning_rate": 2.4329828146074095e-07, "loss": 1.331, "step": 96 }, { "epoch": 0.06455820019563091, "grad_norm": 0.143110990524292, "learning_rate": 1.522932452260595e-08, "loss": 1.3777, "step": 99 }, { "epoch": 0.06455820019563091, "eval_loss": 1.3658305406570435, "eval_runtime": 35.5981, "eval_samples_per_second": 72.56, "eval_steps_per_second": 9.074, "step": 99 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3352559257387008.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }