{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0039148136548700285, "eval_steps": 9, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015659254619480113, "grad_norm": 0.9751595258712769, "learning_rate": 1e-05, "loss": 2.5701, "step": 1 }, { "epoch": 0.00015659254619480113, "eval_loss": 2.5808298587799072, "eval_runtime": 342.618, "eval_samples_per_second": 15.697, "eval_steps_per_second": 1.964, "step": 1 }, { "epoch": 0.00031318509238960227, "grad_norm": 1.1219534873962402, "learning_rate": 2e-05, "loss": 2.1723, "step": 2 }, { "epoch": 0.0004697776385844034, "grad_norm": 0.8830795288085938, "learning_rate": 3e-05, "loss": 2.7015, "step": 3 }, { "epoch": 0.0006263701847792045, "grad_norm": 0.8768100142478943, "learning_rate": 4e-05, "loss": 2.4673, "step": 4 }, { "epoch": 0.0007829627309740056, "grad_norm": 0.871509313583374, "learning_rate": 5e-05, "loss": 2.639, "step": 5 }, { "epoch": 0.0009395552771688068, "grad_norm": 0.8949034214019775, "learning_rate": 6e-05, "loss": 2.8442, "step": 6 }, { "epoch": 0.001096147823363608, "grad_norm": 0.8944017291069031, "learning_rate": 7e-05, "loss": 2.7632, "step": 7 }, { "epoch": 0.001252740369558409, "grad_norm": 0.9067510366439819, "learning_rate": 8e-05, "loss": 2.5252, "step": 8 }, { "epoch": 0.0014093329157532101, "grad_norm": 0.9196743965148926, "learning_rate": 9e-05, "loss": 2.4146, "step": 9 }, { "epoch": 0.0014093329157532101, "eval_loss": 2.459617853164673, "eval_runtime": 341.6715, "eval_samples_per_second": 15.74, "eval_steps_per_second": 1.97, "step": 9 }, { "epoch": 0.0015659254619480112, "grad_norm": 1.0025465488433838, "learning_rate": 0.0001, "loss": 2.6063, "step": 10 }, { "epoch": 0.0017225180081428123, "grad_norm": 0.8671764731407166, "learning_rate": 9.99695413509548e-05, "loss": 2.4126, "step": 11 }, { "epoch": 0.0018791105543376136, "grad_norm": 0.8781599402427673, "learning_rate": 9.987820251299122e-05, "loss": 2.354, "step": 12 }, { "epoch": 0.0020357031005324147, "grad_norm": 0.8639190793037415, "learning_rate": 9.972609476841367e-05, "loss": 2.1836, "step": 13 }, { "epoch": 0.002192295646727216, "grad_norm": 0.8021327257156372, "learning_rate": 9.951340343707852e-05, "loss": 2.554, "step": 14 }, { "epoch": 0.002348888192922017, "grad_norm": 1.0592883825302124, "learning_rate": 9.924038765061042e-05, "loss": 2.1199, "step": 15 }, { "epoch": 0.002505480739116818, "grad_norm": 0.8209556341171265, "learning_rate": 9.890738003669029e-05, "loss": 2.3206, "step": 16 }, { "epoch": 0.002662073285311619, "grad_norm": 0.7151332497596741, "learning_rate": 9.851478631379982e-05, "loss": 2.5804, "step": 17 }, { "epoch": 0.0028186658315064203, "grad_norm": 0.7842516899108887, "learning_rate": 9.806308479691595e-05, "loss": 2.1459, "step": 18 }, { "epoch": 0.0028186658315064203, "eval_loss": 1.9956408739089966, "eval_runtime": 341.649, "eval_samples_per_second": 15.741, "eval_steps_per_second": 1.97, "step": 18 }, { "epoch": 0.0029752583777012216, "grad_norm": 0.8641836643218994, "learning_rate": 9.755282581475769e-05, "loss": 1.7574, "step": 19 }, { "epoch": 0.0031318509238960224, "grad_norm": 0.9240981936454773, "learning_rate": 9.698463103929542e-05, "loss": 1.8377, "step": 20 }, { "epoch": 0.0032884434700908237, "grad_norm": 0.9617156386375427, "learning_rate": 9.635919272833938e-05, "loss": 2.0198, "step": 21 }, { "epoch": 0.0034450360162856246, "grad_norm": 0.9704702496528625, "learning_rate": 9.567727288213005e-05, "loss": 2.1772, "step": 22 }, { "epoch": 0.003601628562480426, "grad_norm": 0.7572294473648071, "learning_rate": 9.493970231495835e-05, "loss": 2.0255, "step": 23 }, { "epoch": 0.003758221108675227, "grad_norm": 0.9574043154716492, "learning_rate": 9.414737964294636e-05, "loss": 1.8707, "step": 24 }, { "epoch": 0.0039148136548700285, "grad_norm": 0.9383586049079895, "learning_rate": 9.330127018922194e-05, "loss": 2.091, "step": 25 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5146843860172800.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }