{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8928571428571429, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017857142857142856, "eval_loss": 0.6113489270210266, "eval_runtime": 3.2137, "eval_samples_per_second": 29.561, "eval_steps_per_second": 3.734, "step": 1 }, { "epoch": 0.05357142857142857, "grad_norm": 0.8834459781646729, "learning_rate": 3e-05, "loss": 0.5885, "step": 3 }, { "epoch": 0.08928571428571429, "eval_loss": 0.4982052445411682, "eval_runtime": 2.6844, "eval_samples_per_second": 35.39, "eval_steps_per_second": 4.47, "step": 5 }, { "epoch": 0.10714285714285714, "grad_norm": 0.6803084015846252, "learning_rate": 6e-05, "loss": 0.4985, "step": 6 }, { "epoch": 0.16071428571428573, "grad_norm": 1.0455479621887207, "learning_rate": 9e-05, "loss": 0.4033, "step": 9 }, { "epoch": 0.17857142857142858, "eval_loss": 0.37860435247421265, "eval_runtime": 2.6972, "eval_samples_per_second": 35.222, "eval_steps_per_second": 4.449, "step": 10 }, { "epoch": 0.21428571428571427, "grad_norm": 0.5007634162902832, "learning_rate": 9.938441702975689e-05, "loss": 0.3331, "step": 12 }, { "epoch": 0.26785714285714285, "grad_norm": 0.4750365614891052, "learning_rate": 9.619397662556435e-05, "loss": 0.3576, "step": 15 }, { "epoch": 0.26785714285714285, "eval_loss": 0.334573894739151, "eval_runtime": 2.6977, "eval_samples_per_second": 35.215, "eval_steps_per_second": 4.448, "step": 15 }, { "epoch": 0.32142857142857145, "grad_norm": 0.47219088673591614, "learning_rate": 9.045084971874738e-05, "loss": 0.2991, "step": 18 }, { "epoch": 0.35714285714285715, "eval_loss": 0.3077447712421417, "eval_runtime": 2.7069, "eval_samples_per_second": 35.095, "eval_steps_per_second": 4.433, "step": 20 }, { "epoch": 0.375, "grad_norm": 0.3391444683074951, "learning_rate": 8.247240241650918e-05, "loss": 0.2959, "step": 21 }, { "epoch": 0.42857142857142855, "grad_norm": 0.41816258430480957, "learning_rate": 7.269952498697734e-05, "loss": 0.2976, "step": 24 }, { "epoch": 0.44642857142857145, "eval_loss": 0.3027045726776123, "eval_runtime": 2.7073, "eval_samples_per_second": 35.091, "eval_steps_per_second": 4.433, "step": 25 }, { "epoch": 0.48214285714285715, "grad_norm": 0.37434259057044983, "learning_rate": 6.167226819279528e-05, "loss": 0.2523, "step": 27 }, { "epoch": 0.5357142857142857, "grad_norm": 0.47235336899757385, "learning_rate": 5e-05, "loss": 0.2896, "step": 30 }, { "epoch": 0.5357142857142857, "eval_loss": 0.29465004801750183, "eval_runtime": 2.7171, "eval_samples_per_second": 34.964, "eval_steps_per_second": 4.417, "step": 30 }, { "epoch": 0.5892857142857143, "grad_norm": 0.3799128830432892, "learning_rate": 3.832773180720475e-05, "loss": 0.2806, "step": 33 }, { "epoch": 0.625, "eval_loss": 0.29076987504959106, "eval_runtime": 2.7183, "eval_samples_per_second": 34.948, "eval_steps_per_second": 4.415, "step": 35 }, { "epoch": 0.6428571428571429, "grad_norm": 0.3711701035499573, "learning_rate": 2.7300475013022663e-05, "loss": 0.2933, "step": 36 }, { "epoch": 0.6964285714285714, "grad_norm": 0.34898632764816284, "learning_rate": 1.7527597583490822e-05, "loss": 0.2671, "step": 39 }, { "epoch": 0.7142857142857143, "eval_loss": 0.28889867663383484, "eval_runtime": 2.708, "eval_samples_per_second": 35.081, "eval_steps_per_second": 4.431, "step": 40 }, { "epoch": 0.75, "grad_norm": 0.3814017176628113, "learning_rate": 9.549150281252633e-06, "loss": 0.27, "step": 42 }, { "epoch": 0.8035714285714286, "grad_norm": 0.3966533839702606, "learning_rate": 3.8060233744356633e-06, "loss": 0.3033, "step": 45 }, { "epoch": 0.8035714285714286, "eval_loss": 0.2880026698112488, "eval_runtime": 2.7104, "eval_samples_per_second": 35.05, "eval_steps_per_second": 4.427, "step": 45 }, { "epoch": 0.8571428571428571, "grad_norm": 0.3969070613384247, "learning_rate": 6.15582970243117e-07, "loss": 0.2683, "step": 48 }, { "epoch": 0.8928571428571429, "eval_loss": 0.2877660393714905, "eval_runtime": 2.7107, "eval_samples_per_second": 35.047, "eval_steps_per_second": 4.427, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3607236534272e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }