{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 1.0, "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14285714285714285, "grad_norm": 203.78257751464844, "learning_rate": 4.642857142857143e-05, "loss": 0.8088, "step": 1 }, { "epoch": 0.14285714285714285, "eval_loss": 1.0832017660140991, "eval_runtime": 4.4764, "eval_samples_per_second": 2.234, "eval_steps_per_second": 1.117, "step": 1 }, { "epoch": 0.2857142857142857, "grad_norm": 551.1321411132812, "learning_rate": 4.2857142857142856e-05, "loss": 1.5034, "step": 2 }, { "epoch": 0.2857142857142857, "eval_loss": 0.8266311883926392, "eval_runtime": 2.9969, "eval_samples_per_second": 3.337, "eval_steps_per_second": 1.668, "step": 2 }, { "epoch": 0.42857142857142855, "grad_norm": 342.6697998046875, "learning_rate": 3.928571428571429e-05, "loss": 1.617, "step": 3 }, { "epoch": 0.42857142857142855, "eval_loss": 0.7760497331619263, "eval_runtime": 2.9732, "eval_samples_per_second": 3.363, "eval_steps_per_second": 1.682, "step": 3 }, { "epoch": 0.5714285714285714, "grad_norm": 792.9654541015625, "learning_rate": 3.571428571428572e-05, "loss": 2.2375, "step": 4 }, { "epoch": 0.5714285714285714, "eval_loss": 0.7384463548660278, "eval_runtime": 3.1473, "eval_samples_per_second": 3.177, "eval_steps_per_second": 1.589, "step": 4 }, { "epoch": 0.7142857142857143, "grad_norm": 210.89300537109375, "learning_rate": 3.2142857142857144e-05, "loss": 1.5411, "step": 5 }, { "epoch": 0.7142857142857143, "eval_loss": 0.82770174741745, "eval_runtime": 3.0122, "eval_samples_per_second": 3.32, "eval_steps_per_second": 1.66, "step": 5 }, { "epoch": 0.8571428571428571, "grad_norm": 314.6759338378906, "learning_rate": 2.857142857142857e-05, "loss": 2.4631, "step": 6 }, { "epoch": 0.8571428571428571, "eval_loss": 0.44329363107681274, "eval_runtime": 4.1331, "eval_samples_per_second": 2.42, "eval_steps_per_second": 1.21, "step": 6 }, { "epoch": 1.0, "grad_norm": 17.239919662475586, "learning_rate": 2.5e-05, "loss": 0.0217, "step": 7 }, { "epoch": 1.0, "eval_loss": 0.9471458196640015, "eval_runtime": 3.013, "eval_samples_per_second": 3.319, "eval_steps_per_second": 1.659, "step": 7 }, { "epoch": 1.1428571428571428, "grad_norm": 213.36294555664062, "learning_rate": 2.1428571428571428e-05, "loss": 2.4217, "step": 8 }, { "epoch": 1.1428571428571428, "eval_loss": 0.9182891845703125, "eval_runtime": 2.9638, "eval_samples_per_second": 3.374, "eval_steps_per_second": 1.687, "step": 8 }, { "epoch": 1.2857142857142856, "grad_norm": 200.6674346923828, "learning_rate": 1.785714285714286e-05, "loss": 2.0588, "step": 9 }, { "epoch": 1.2857142857142856, "eval_loss": 0.7376034259796143, "eval_runtime": 2.958, "eval_samples_per_second": 3.381, "eval_steps_per_second": 1.69, "step": 9 }, { "epoch": 1.4285714285714286, "grad_norm": 215.76296997070312, "learning_rate": 1.4285714285714285e-05, "loss": 1.6484, "step": 10 }, { "epoch": 1.4285714285714286, "eval_loss": 0.5826703906059265, "eval_runtime": 2.9863, "eval_samples_per_second": 3.349, "eval_steps_per_second": 1.674, "step": 10 }, { "epoch": 1.5714285714285714, "grad_norm": 122.98780059814453, "learning_rate": 1.0714285714285714e-05, "loss": 0.9379, "step": 11 }, { "epoch": 1.5714285714285714, "eval_loss": 0.5854327082633972, "eval_runtime": 2.9738, "eval_samples_per_second": 3.363, "eval_steps_per_second": 1.681, "step": 11 }, { "epoch": 1.7142857142857144, "grad_norm": 110.82337951660156, "learning_rate": 7.142857142857143e-06, "loss": 0.8608, "step": 12 }, { "epoch": 1.7142857142857144, "eval_loss": 0.5832847952842712, "eval_runtime": 6.4862, "eval_samples_per_second": 1.542, "eval_steps_per_second": 0.771, "step": 12 }, { "epoch": 1.8571428571428572, "grad_norm": 91.99958801269531, "learning_rate": 3.5714285714285714e-06, "loss": 0.958, "step": 13 }, { "epoch": 1.8571428571428572, "eval_loss": 0.5970481634140015, "eval_runtime": 2.9762, "eval_samples_per_second": 3.36, "eval_steps_per_second": 1.68, "step": 13 }, { "epoch": 2.0, "grad_norm": 28.6821346282959, "learning_rate": 0.0, "loss": 0.0355, "step": 14 }, { "epoch": 2.0, "eval_loss": 0.5955778956413269, "eval_runtime": 5.3785, "eval_samples_per_second": 1.859, "eval_steps_per_second": 0.93, "step": 14 }, { "epoch": 2.0, "step": 14, "total_flos": 5816699796600.0, "train_loss": 1.3652541448495217, "train_runtime": 216.8469, "train_samples_per_second": 0.461, "train_steps_per_second": 0.065 } ], "logging_steps": 1.0, "max_steps": 14, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5816699796600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }