{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 300, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 10.0, "grad_norm": 10.267565727233887, "learning_rate": 4.5e-05, "loss": 2.6587, "step": 300 }, { "epoch": 10.0, "eval_loss": 2.672071695327759, "eval_runtime": 2.1303, "eval_samples_per_second": 336.103, "eval_steps_per_second": 3.755, "step": 300 }, { "epoch": 20.0, "grad_norm": 5.43134069442749, "learning_rate": 4e-05, "loss": 0.5242, "step": 600 }, { "epoch": 20.0, "eval_loss": 1.995083212852478, "eval_runtime": 2.183, "eval_samples_per_second": 327.995, "eval_steps_per_second": 3.665, "step": 600 }, { "epoch": 30.0, "grad_norm": 2.2661261558532715, "learning_rate": 3.5e-05, "loss": 0.1995, "step": 900 }, { "epoch": 30.0, "eval_loss": 1.776659369468689, "eval_runtime": 2.105, "eval_samples_per_second": 340.146, "eval_steps_per_second": 3.801, "step": 900 }, { "epoch": 40.0, "grad_norm": 1.7297999858856201, "learning_rate": 3e-05, "loss": 0.1025, "step": 1200 }, { "epoch": 40.0, "eval_loss": 1.6002683639526367, "eval_runtime": 2.1049, "eval_samples_per_second": 340.159, "eval_steps_per_second": 3.801, "step": 1200 }, { "epoch": 50.0, "grad_norm": 1.119903326034546, "learning_rate": 2.5e-05, "loss": 0.0609, "step": 1500 }, { "epoch": 50.0, "eval_loss": 1.5019861459732056, "eval_runtime": 2.0694, "eval_samples_per_second": 345.998, "eval_steps_per_second": 3.866, "step": 1500 }, { "epoch": 60.0, "grad_norm": 0.4384348690509796, "learning_rate": 2e-05, "loss": 0.042, "step": 1800 }, { "epoch": 60.0, "eval_loss": 1.3371723890304565, "eval_runtime": 2.1069, "eval_samples_per_second": 339.838, "eval_steps_per_second": 3.797, "step": 1800 }, { "epoch": 70.0, "grad_norm": 0.4751300811767578, "learning_rate": 1.5e-05, "loss": 0.0315, "step": 2100 }, { "epoch": 70.0, "eval_loss": 1.3104065656661987, "eval_runtime": 2.0197, "eval_samples_per_second": 354.506, "eval_steps_per_second": 3.961, "step": 2100 }, { "epoch": 80.0, "grad_norm": 1.0900623798370361, "learning_rate": 1e-05, "loss": 0.0271, "step": 2400 }, { "epoch": 80.0, "eval_loss": 1.2714661359786987, "eval_runtime": 2.1021, "eval_samples_per_second": 340.608, "eval_steps_per_second": 3.806, "step": 2400 }, { "epoch": 90.0, "grad_norm": 0.31860601902008057, "learning_rate": 5e-06, "loss": 0.0212, "step": 2700 }, { "epoch": 90.0, "eval_loss": 1.2446495294570923, "eval_runtime": 2.1073, "eval_samples_per_second": 339.765, "eval_steps_per_second": 3.796, "step": 2700 }, { "epoch": 100.0, "grad_norm": 0.22674699127674103, "learning_rate": 0.0, "loss": 0.0202, "step": 3000 }, { "epoch": 100.0, "eval_loss": 1.237874984741211, "eval_runtime": 2.1055, "eval_samples_per_second": 340.065, "eval_steps_per_second": 3.8, "step": 3000 }, { "epoch": 100.0, "step": 3000, "total_flos": 3.1668214733568e+16, "train_loss": 0.36876122029622393, "train_runtime": 1639.2452, "train_samples_per_second": 146.043, "train_steps_per_second": 1.83 } ], "logging_steps": 300, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.1668214733568e+16, "train_batch_size": 80, "trial_name": null, "trial_params": null }