{ "best_metric": null, "best_model_checkpoint": null, "epoch": 200.0, "eval_steps": 780, "global_step": 7800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 20.0, "grad_norm": 15.68354606628418, "learning_rate": 9e-06, "loss": 2.479, "step": 780 }, { "epoch": 20.0, "eval_loss": 2.5237555503845215, "eval_runtime": 9.7181, "eval_samples_per_second": 256.223, "eval_steps_per_second": 2.573, "step": 780 }, { "epoch": 40.0, "grad_norm": 7.548585891723633, "learning_rate": 8.000000000000001e-06, "loss": 0.5415, "step": 1560 }, { "epoch": 40.0, "eval_loss": 1.9512752294540405, "eval_runtime": 9.4805, "eval_samples_per_second": 262.644, "eval_steps_per_second": 2.637, "step": 1560 }, { "epoch": 60.0, "grad_norm": 4.004195213317871, "learning_rate": 7e-06, "loss": 0.1937, "step": 2340 }, { "epoch": 60.0, "eval_loss": 1.675230622291565, "eval_runtime": 9.4427, "eval_samples_per_second": 263.695, "eval_steps_per_second": 2.648, "step": 2340 }, { "epoch": 80.0, "grad_norm": 1.42832350730896, "learning_rate": 6e-06, "loss": 0.1072, "step": 3120 }, { "epoch": 80.0, "eval_loss": 1.5575916767120361, "eval_runtime": 9.4213, "eval_samples_per_second": 264.296, "eval_steps_per_second": 2.654, "step": 3120 }, { "epoch": 100.0, "grad_norm": 4.648789405822754, "learning_rate": 5e-06, "loss": 0.0722, "step": 3900 }, { "epoch": 100.0, "eval_loss": 1.487786054611206, "eval_runtime": 9.4183, "eval_samples_per_second": 264.378, "eval_steps_per_second": 2.654, "step": 3900 }, { "epoch": 120.0, "grad_norm": 1.3144129514694214, "learning_rate": 4.000000000000001e-06, "loss": 0.0542, "step": 4680 }, { "epoch": 120.0, "eval_loss": 1.418711543083191, "eval_runtime": 9.7326, "eval_samples_per_second": 255.84, "eval_steps_per_second": 2.569, "step": 4680 }, { "epoch": 140.0, "grad_norm": 1.629385232925415, "learning_rate": 3e-06, "loss": 0.0433, "step": 5460 }, { "epoch": 140.0, "eval_loss": 1.3937839269638062, "eval_runtime": 9.4439, "eval_samples_per_second": 263.662, "eval_steps_per_second": 2.647, "step": 5460 }, { "epoch": 160.0, "grad_norm": 1.3994438648223877, "learning_rate": 2.0000000000000003e-06, "loss": 0.0376, "step": 6240 }, { "epoch": 160.0, "eval_loss": 1.3543996810913086, "eval_runtime": 9.4766, "eval_samples_per_second": 262.751, "eval_steps_per_second": 2.638, "step": 6240 }, { "epoch": 180.0, "grad_norm": 1.0756237506866455, "learning_rate": 1.0000000000000002e-06, "loss": 0.0333, "step": 7020 }, { "epoch": 180.0, "eval_loss": 1.3324673175811768, "eval_runtime": 9.5119, "eval_samples_per_second": 261.777, "eval_steps_per_second": 2.628, "step": 7020 }, { "epoch": 200.0, "grad_norm": 0.9140580892562866, "learning_rate": 0.0, "loss": 0.0311, "step": 7800 }, { "epoch": 200.0, "eval_loss": 1.3237212896347046, "eval_runtime": 9.4689, "eval_samples_per_second": 262.967, "eval_steps_per_second": 2.64, "step": 7800 }, { "epoch": 200.0, "step": 7800, "total_flos": 1.3175247232512e+17, "train_loss": 0.35931170732547074, "train_runtime": 4504.8295, "train_samples_per_second": 110.548, "train_steps_per_second": 1.731 } ], "logging_steps": 780, "max_steps": 7800, "num_input_tokens_seen": 0, "num_train_epochs": 200, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3175247232512e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }