{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.015715857300015717, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003143171460003143, "eval_loss": 13.059508323669434, "eval_runtime": 187.9101, "eval_samples_per_second": 28.514, "eval_steps_per_second": 3.566, "step": 1 }, { "epoch": 0.000942951438000943, "grad_norm": 75.8877944946289, "learning_rate": 3e-05, "loss": 12.881, "step": 3 }, { "epoch": 0.0015715857300015717, "eval_loss": 5.635742664337158, "eval_runtime": 189.3855, "eval_samples_per_second": 28.292, "eval_steps_per_second": 3.538, "step": 5 }, { "epoch": 0.001885902876001886, "grad_norm": 43.98210144042969, "learning_rate": 6e-05, "loss": 7.6928, "step": 6 }, { "epoch": 0.002828854314002829, "grad_norm": 24.16446876525879, "learning_rate": 9e-05, "loss": 2.323, "step": 9 }, { "epoch": 0.0031431714600031434, "eval_loss": 0.45748162269592285, "eval_runtime": 189.1912, "eval_samples_per_second": 28.321, "eval_steps_per_second": 3.541, "step": 10 }, { "epoch": 0.003771805752003772, "grad_norm": 46.70669937133789, "learning_rate": 9.938441702975689e-05, "loss": 0.8778, "step": 12 }, { "epoch": 0.004714757190004715, "grad_norm": 16.055130004882812, "learning_rate": 9.619397662556435e-05, "loss": 0.4203, "step": 15 }, { "epoch": 0.004714757190004715, "eval_loss": 0.4140756130218506, "eval_runtime": 188.9791, "eval_samples_per_second": 28.352, "eval_steps_per_second": 3.545, "step": 15 }, { "epoch": 0.005657708628005658, "grad_norm": 17.71535873413086, "learning_rate": 9.045084971874738e-05, "loss": 0.4977, "step": 18 }, { "epoch": 0.006286342920006287, "eval_loss": 0.5579892992973328, "eval_runtime": 188.9276, "eval_samples_per_second": 28.36, "eval_steps_per_second": 3.546, "step": 20 }, { "epoch": 0.006600660066006601, "grad_norm": 11.61266040802002, "learning_rate": 8.247240241650918e-05, "loss": 0.5078, "step": 21 }, { "epoch": 0.007543611504007544, "grad_norm": 10.140732765197754, "learning_rate": 7.269952498697734e-05, "loss": 0.4071, "step": 24 }, { "epoch": 0.007857928650007858, "eval_loss": 0.35243675112724304, "eval_runtime": 188.9749, "eval_samples_per_second": 28.353, "eval_steps_per_second": 3.545, "step": 25 }, { "epoch": 0.008486562942008486, "grad_norm": 4.260001182556152, "learning_rate": 6.167226819279528e-05, "loss": 0.3264, "step": 27 }, { "epoch": 0.00942951438000943, "grad_norm": 3.704922676086426, "learning_rate": 5e-05, "loss": 0.3148, "step": 30 }, { "epoch": 0.00942951438000943, "eval_loss": 0.2736121416091919, "eval_runtime": 189.0749, "eval_samples_per_second": 28.338, "eval_steps_per_second": 3.544, "step": 30 }, { "epoch": 0.010372465818010372, "grad_norm": 5.416778564453125, "learning_rate": 3.832773180720475e-05, "loss": 0.2178, "step": 33 }, { "epoch": 0.011001100110011002, "eval_loss": 0.25446486473083496, "eval_runtime": 189.0079, "eval_samples_per_second": 28.348, "eval_steps_per_second": 3.545, "step": 35 }, { "epoch": 0.011315417256011316, "grad_norm": 3.151252269744873, "learning_rate": 2.7300475013022663e-05, "loss": 0.2044, "step": 36 }, { "epoch": 0.012258368694012258, "grad_norm": 4.713849067687988, "learning_rate": 1.7527597583490822e-05, "loss": 0.1608, "step": 39 }, { "epoch": 0.012572685840012573, "eval_loss": 0.2181444615125656, "eval_runtime": 189.1182, "eval_samples_per_second": 28.331, "eval_steps_per_second": 3.543, "step": 40 }, { "epoch": 0.013201320132013201, "grad_norm": 5.960506916046143, "learning_rate": 9.549150281252633e-06, "loss": 0.2082, "step": 42 }, { "epoch": 0.014144271570014143, "grad_norm": 5.334136962890625, "learning_rate": 3.8060233744356633e-06, "loss": 0.2383, "step": 45 }, { "epoch": 0.014144271570014143, "eval_loss": 0.21025989949703217, "eval_runtime": 189.1168, "eval_samples_per_second": 28.332, "eval_steps_per_second": 3.543, "step": 45 }, { "epoch": 0.015087223008015087, "grad_norm": 3.7867331504821777, "learning_rate": 6.15582970243117e-07, "loss": 0.2366, "step": 48 }, { "epoch": 0.015715857300015717, "eval_loss": 0.20618359744548798, "eval_runtime": 189.0615, "eval_samples_per_second": 28.34, "eval_steps_per_second": 3.544, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.02538779410432e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }