{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.005187798298402158, "eval_steps": 9, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020751193193608634, "grad_norm": 1.4039883613586426, "learning_rate": 1e-05, "loss": 2.1244, "step": 1 }, { "epoch": 0.00020751193193608634, "eval_loss": 2.2763936519622803, "eval_runtime": 237.6501, "eval_samples_per_second": 17.076, "eval_steps_per_second": 2.138, "step": 1 }, { "epoch": 0.0004150238638721727, "grad_norm": 1.3511576652526855, "learning_rate": 2e-05, "loss": 2.3025, "step": 2 }, { "epoch": 0.0006225357958082589, "grad_norm": 1.3691860437393188, "learning_rate": 3e-05, "loss": 2.0968, "step": 3 }, { "epoch": 0.0008300477277443454, "grad_norm": 1.2978947162628174, "learning_rate": 4e-05, "loss": 1.891, "step": 4 }, { "epoch": 0.0010375596596804316, "grad_norm": 1.3064836263656616, "learning_rate": 5e-05, "loss": 2.0951, "step": 5 }, { "epoch": 0.0012450715916165179, "grad_norm": 1.510711431503296, "learning_rate": 6e-05, "loss": 2.1635, "step": 6 }, { "epoch": 0.0014525835235526042, "grad_norm": 1.724186897277832, "learning_rate": 7e-05, "loss": 2.3924, "step": 7 }, { "epoch": 0.0016600954554886907, "grad_norm": 1.3614786863327026, "learning_rate": 8e-05, "loss": 2.1578, "step": 8 }, { "epoch": 0.001867607387424777, "grad_norm": 1.291449785232544, "learning_rate": 9e-05, "loss": 2.0637, "step": 9 }, { "epoch": 0.001867607387424777, "eval_loss": 1.9984328746795654, "eval_runtime": 236.7978, "eval_samples_per_second": 17.137, "eval_steps_per_second": 2.145, "step": 9 }, { "epoch": 0.002075119319360863, "grad_norm": 1.0605844259262085, "learning_rate": 0.0001, "loss": 1.895, "step": 10 }, { "epoch": 0.0022826312512969496, "grad_norm": 1.6267553567886353, "learning_rate": 9.99695413509548e-05, "loss": 2.0418, "step": 11 }, { "epoch": 0.0024901431832330357, "grad_norm": 1.5893021821975708, "learning_rate": 9.987820251299122e-05, "loss": 1.9267, "step": 12 }, { "epoch": 0.0026976551151691223, "grad_norm": 1.1723555326461792, "learning_rate": 9.972609476841367e-05, "loss": 1.8826, "step": 13 }, { "epoch": 0.0029051670471052084, "grad_norm": 1.140472173690796, "learning_rate": 9.951340343707852e-05, "loss": 1.9261, "step": 14 }, { "epoch": 0.003112678979041295, "grad_norm": 1.1380834579467773, "learning_rate": 9.924038765061042e-05, "loss": 1.8419, "step": 15 }, { "epoch": 0.0033201909109773814, "grad_norm": 1.0181565284729004, "learning_rate": 9.890738003669029e-05, "loss": 1.7555, "step": 16 }, { "epoch": 0.0035277028429134675, "grad_norm": 0.8560456037521362, "learning_rate": 9.851478631379982e-05, "loss": 1.7858, "step": 17 }, { "epoch": 0.003735214774849554, "grad_norm": 1.1353284120559692, "learning_rate": 9.806308479691595e-05, "loss": 1.7424, "step": 18 }, { "epoch": 0.003735214774849554, "eval_loss": 1.7517411708831787, "eval_runtime": 236.5855, "eval_samples_per_second": 17.152, "eval_steps_per_second": 2.147, "step": 18 }, { "epoch": 0.00394272670678564, "grad_norm": 1.1942611932754517, "learning_rate": 9.755282581475769e-05, "loss": 1.6209, "step": 19 }, { "epoch": 0.004150238638721726, "grad_norm": 1.261995792388916, "learning_rate": 9.698463103929542e-05, "loss": 1.6995, "step": 20 }, { "epoch": 0.004357750570657813, "grad_norm": 1.0118539333343506, "learning_rate": 9.635919272833938e-05, "loss": 1.698, "step": 21 }, { "epoch": 0.004565262502593899, "grad_norm": 0.9871833920478821, "learning_rate": 9.567727288213005e-05, "loss": 1.7638, "step": 22 }, { "epoch": 0.004772774434529985, "grad_norm": 1.1649303436279297, "learning_rate": 9.493970231495835e-05, "loss": 1.707, "step": 23 }, { "epoch": 0.0049802863664660715, "grad_norm": 1.3631136417388916, "learning_rate": 9.414737964294636e-05, "loss": 1.9811, "step": 24 }, { "epoch": 0.005187798298402158, "grad_norm": 1.030550479888916, "learning_rate": 9.330127018922194e-05, "loss": 1.7537, "step": 25 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0772843939692544e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }