{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.834862385321101, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03669724770642202, "eval_loss": 6.940816402435303, "eval_runtime": 0.4894, "eval_samples_per_second": 94.001, "eval_steps_per_second": 12.261, "step": 1 }, { "epoch": 0.11009174311926606, "grad_norm": 0.44857925176620483, "learning_rate": 3e-05, "loss": 6.9418, "step": 3 }, { "epoch": 0.1834862385321101, "eval_loss": 6.936913013458252, "eval_runtime": 0.0633, "eval_samples_per_second": 726.735, "eval_steps_per_second": 94.792, "step": 5 }, { "epoch": 0.22018348623853212, "grad_norm": 0.4511624872684479, "learning_rate": 6e-05, "loss": 6.9388, "step": 6 }, { "epoch": 0.3302752293577982, "grad_norm": 0.44215646386146545, "learning_rate": 9e-05, "loss": 6.9322, "step": 9 }, { "epoch": 0.3669724770642202, "eval_loss": 6.922896862030029, "eval_runtime": 0.0602, "eval_samples_per_second": 763.509, "eval_steps_per_second": 99.588, "step": 10 }, { "epoch": 0.44036697247706424, "grad_norm": 0.4800911843776703, "learning_rate": 9.938441702975689e-05, "loss": 6.9237, "step": 12 }, { "epoch": 0.5504587155963303, "grad_norm": 0.5088721513748169, "learning_rate": 9.619397662556435e-05, "loss": 6.9119, "step": 15 }, { "epoch": 0.5504587155963303, "eval_loss": 6.901125907897949, "eval_runtime": 0.0697, "eval_samples_per_second": 660.168, "eval_steps_per_second": 86.109, "step": 15 }, { "epoch": 0.6605504587155964, "grad_norm": 0.5459818840026855, "learning_rate": 9.045084971874738e-05, "loss": 6.8969, "step": 18 }, { "epoch": 0.7339449541284404, "eval_loss": 6.87958288192749, "eval_runtime": 0.058, "eval_samples_per_second": 793.383, "eval_steps_per_second": 103.485, "step": 20 }, { "epoch": 0.7706422018348624, "grad_norm": 0.5402729511260986, "learning_rate": 8.247240241650918e-05, "loss": 6.8852, "step": 21 }, { "epoch": 0.8807339449541285, "grad_norm": 0.5008237361907959, "learning_rate": 7.269952498697734e-05, "loss": 6.8723, "step": 24 }, { "epoch": 0.9174311926605505, "eval_loss": 6.86152982711792, "eval_runtime": 0.0589, "eval_samples_per_second": 781.084, "eval_steps_per_second": 101.881, "step": 25 }, { "epoch": 0.9908256880733946, "grad_norm": 0.4618155360221863, "learning_rate": 6.167226819279528e-05, "loss": 6.8632, "step": 27 }, { "epoch": 1.1009174311926606, "grad_norm": 0.4284931719303131, "learning_rate": 5e-05, "loss": 8.6369, "step": 30 }, { "epoch": 1.1009174311926606, "eval_loss": 6.850541591644287, "eval_runtime": 0.0613, "eval_samples_per_second": 750.191, "eval_steps_per_second": 97.851, "step": 30 }, { "epoch": 1.2110091743119267, "grad_norm": 0.37953659892082214, "learning_rate": 3.832773180720475e-05, "loss": 6.7564, "step": 33 }, { "epoch": 1.2844036697247707, "eval_loss": 6.844419956207275, "eval_runtime": 0.0605, "eval_samples_per_second": 759.745, "eval_steps_per_second": 99.097, "step": 35 }, { "epoch": 1.3211009174311927, "grad_norm": 0.35859671235084534, "learning_rate": 2.7300475013022663e-05, "loss": 6.8617, "step": 36 }, { "epoch": 1.4311926605504588, "grad_norm": 0.3261173963546753, "learning_rate": 1.7527597583490822e-05, "loss": 6.9499, "step": 39 }, { "epoch": 1.4678899082568808, "eval_loss": 6.84140157699585, "eval_runtime": 0.0596, "eval_samples_per_second": 772.357, "eval_steps_per_second": 100.742, "step": 40 }, { "epoch": 1.5412844036697249, "grad_norm": 0.3164646625518799, "learning_rate": 9.549150281252633e-06, "loss": 6.8576, "step": 42 }, { "epoch": 1.6513761467889907, "grad_norm": 0.3245089054107666, "learning_rate": 3.8060233744356633e-06, "loss": 6.8992, "step": 45 }, { "epoch": 1.6513761467889907, "eval_loss": 6.84040641784668, "eval_runtime": 0.0629, "eval_samples_per_second": 731.69, "eval_steps_per_second": 95.438, "step": 45 }, { "epoch": 1.761467889908257, "grad_norm": 0.30095237493515015, "learning_rate": 6.15582970243117e-07, "loss": 6.7224, "step": 48 }, { "epoch": 1.834862385321101, "eval_loss": 6.840231895446777, "eval_runtime": 0.0584, "eval_samples_per_second": 788.113, "eval_steps_per_second": 102.797, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1110703589376.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }