{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.06231967686093479, "eval_steps": 9, "global_step": 54, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001154068090017311, "eval_loss": 2.0465598106384277, "eval_runtime": 103.0211, "eval_samples_per_second": 14.172, "eval_steps_per_second": 1.776, "step": 1 }, { "epoch": 0.003462204270051933, "grad_norm": 8.097893714904785, "learning_rate": 3e-05, "loss": 8.0719, "step": 3 }, { "epoch": 0.006924408540103866, "grad_norm": 5.514200210571289, "learning_rate": 6e-05, "loss": 8.0211, "step": 6 }, { "epoch": 0.0103866128101558, "grad_norm": 5.214906692504883, "learning_rate": 9e-05, "loss": 8.0571, "step": 9 }, { "epoch": 0.0103866128101558, "eval_loss": 1.9573053121566772, "eval_runtime": 104.5011, "eval_samples_per_second": 13.971, "eval_steps_per_second": 1.751, "step": 9 }, { "epoch": 0.013848817080207732, "grad_norm": 4.61670446395874, "learning_rate": 9.987820251299122e-05, "loss": 7.7237, "step": 12 }, { "epoch": 0.017311021350259664, "grad_norm": 4.776439666748047, "learning_rate": 9.924038765061042e-05, "loss": 7.5723, "step": 15 }, { "epoch": 0.0207732256203116, "grad_norm": 3.863844394683838, "learning_rate": 9.806308479691595e-05, "loss": 7.8625, "step": 18 }, { "epoch": 0.0207732256203116, "eval_loss": 1.938367247581482, "eval_runtime": 104.6138, "eval_samples_per_second": 13.956, "eval_steps_per_second": 1.749, "step": 18 }, { "epoch": 0.024235429890363532, "grad_norm": 3.75036358833313, "learning_rate": 9.635919272833938e-05, "loss": 7.5972, "step": 21 }, { "epoch": 0.027697634160415464, "grad_norm": 3.694056510925293, "learning_rate": 9.414737964294636e-05, "loss": 7.325, "step": 24 }, { "epoch": 0.031159838430467397, "grad_norm": 3.767008066177368, "learning_rate": 9.145187862775209e-05, "loss": 7.8502, "step": 27 }, { "epoch": 0.031159838430467397, "eval_loss": 1.9282969236373901, "eval_runtime": 104.5512, "eval_samples_per_second": 13.964, "eval_steps_per_second": 1.75, "step": 27 }, { "epoch": 0.03462204270051933, "grad_norm": 3.892972230911255, "learning_rate": 8.83022221559489e-05, "loss": 7.737, "step": 30 }, { "epoch": 0.038084246970571264, "grad_norm": 4.205379962921143, "learning_rate": 8.473291852294987e-05, "loss": 7.7768, "step": 33 }, { "epoch": 0.0415464512406232, "grad_norm": 4.217511177062988, "learning_rate": 8.07830737662829e-05, "loss": 7.7462, "step": 36 }, { "epoch": 0.0415464512406232, "eval_loss": 1.9257190227508545, "eval_runtime": 104.5925, "eval_samples_per_second": 13.959, "eval_steps_per_second": 1.75, "step": 36 }, { "epoch": 0.04500865551067513, "grad_norm": 4.240529537200928, "learning_rate": 7.649596321166024e-05, "loss": 7.9651, "step": 39 }, { "epoch": 0.048470859780727064, "grad_norm": 4.575836658477783, "learning_rate": 7.191855733945387e-05, "loss": 7.7481, "step": 42 }, { "epoch": 0.05193306405077899, "grad_norm": 4.351076126098633, "learning_rate": 6.710100716628344e-05, "loss": 7.9443, "step": 45 }, { "epoch": 0.05193306405077899, "eval_loss": 1.9247784614562988, "eval_runtime": 104.5498, "eval_samples_per_second": 13.965, "eval_steps_per_second": 1.75, "step": 45 }, { "epoch": 0.05539526832083093, "grad_norm": 4.322741508483887, "learning_rate": 6.209609477998338e-05, "loss": 7.5892, "step": 48 }, { "epoch": 0.058857472590882864, "grad_norm": 3.949061870574951, "learning_rate": 5.695865504800327e-05, "loss": 7.5119, "step": 51 }, { "epoch": 0.06231967686093479, "grad_norm": 4.710406303405762, "learning_rate": 5.174497483512506e-05, "loss": 7.8128, "step": 54 }, { "epoch": 0.06231967686093479, "eval_loss": 1.9215248823165894, "eval_runtime": 104.5083, "eval_samples_per_second": 13.97, "eval_steps_per_second": 1.751, "step": 54 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.571542172604826e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }