{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.189873417721519, "eval_steps": 50, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10126582278481013, "grad_norm": 0.8357870578765869, "learning_rate": 5.000000000000001e-07, "loss": 1.4644, "step": 1 }, { "epoch": 0.10126582278481013, "eval_loss": 1.4829777479171753, "eval_runtime": 1.7752, "eval_samples_per_second": 9.576, "eval_steps_per_second": 2.817, "step": 1 }, { "epoch": 0.20253164556962025, "grad_norm": 1.0635546445846558, "learning_rate": 1.0000000000000002e-06, "loss": 1.5019, "step": 2 }, { "epoch": 0.3037974683544304, "grad_norm": 0.7561400532722473, "learning_rate": 1.5e-06, "loss": 1.4153, "step": 3 }, { "epoch": 0.4050632911392405, "grad_norm": 1.0996471643447876, "learning_rate": 2.0000000000000003e-06, "loss": 1.5571, "step": 4 }, { "epoch": 0.5063291139240507, "grad_norm": 0.810637891292572, "learning_rate": 2.5e-06, "loss": 1.4258, "step": 5 }, { "epoch": 0.6075949367088608, "grad_norm": 0.9158858060836792, "learning_rate": 3e-06, "loss": 1.4646, "step": 6 }, { "epoch": 0.7088607594936709, "grad_norm": 0.7619764804840088, "learning_rate": 3.5e-06, "loss": 1.3655, "step": 7 }, { "epoch": 0.810126582278481, "grad_norm": 1.07736337184906, "learning_rate": 4.000000000000001e-06, "loss": 1.5552, "step": 8 }, { "epoch": 0.9113924050632911, "grad_norm": 0.7558680772781372, "learning_rate": 4.5e-06, "loss": 1.3399, "step": 9 }, { "epoch": 1.0632911392405062, "grad_norm": 1.5347477197647095, "learning_rate": 5e-06, "loss": 2.3619, "step": 10 }, { "epoch": 1.1645569620253164, "grad_norm": 0.9588956236839294, "learning_rate": 4.9692208514878445e-06, "loss": 1.5053, "step": 11 }, { "epoch": 1.2658227848101267, "grad_norm": 0.892029881477356, "learning_rate": 4.8776412907378845e-06, "loss": 1.4287, "step": 12 }, { "epoch": 1.3670886075949367, "grad_norm": 0.9654532670974731, "learning_rate": 4.72751631047092e-06, "loss": 1.3541, "step": 13 }, { "epoch": 1.4683544303797469, "grad_norm": 0.872126579284668, "learning_rate": 4.522542485937369e-06, "loss": 1.4782, "step": 14 }, { "epoch": 1.5696202531645569, "grad_norm": 0.8185538649559021, "learning_rate": 4.267766952966369e-06, "loss": 1.4831, "step": 15 }, { "epoch": 1.6708860759493671, "grad_norm": 0.8465214371681213, "learning_rate": 3.969463130731183e-06, "loss": 1.4249, "step": 16 }, { "epoch": 1.7721518987341773, "grad_norm": 1.017401933670044, "learning_rate": 3.634976249348867e-06, "loss": 1.3271, "step": 17 }, { "epoch": 1.8734177215189873, "grad_norm": 0.9488118886947632, "learning_rate": 3.272542485937369e-06, "loss": 1.5254, "step": 18 }, { "epoch": 2.0253164556962027, "grad_norm": 1.628464937210083, "learning_rate": 2.8910861626005774e-06, "loss": 2.3506, "step": 19 }, { "epoch": 2.1265822784810124, "grad_norm": 0.8278865218162537, "learning_rate": 2.5e-06, "loss": 1.2801, "step": 20 }, { "epoch": 2.2278481012658227, "grad_norm": 1.1750155687332153, "learning_rate": 2.1089138373994226e-06, "loss": 1.4173, "step": 21 }, { "epoch": 2.329113924050633, "grad_norm": 0.8267114162445068, "learning_rate": 1.7274575140626318e-06, "loss": 1.395, "step": 22 }, { "epoch": 2.430379746835443, "grad_norm": 1.093450665473938, "learning_rate": 1.3650237506511333e-06, "loss": 1.6499, "step": 23 }, { "epoch": 2.5316455696202533, "grad_norm": 0.8078233599662781, "learning_rate": 1.0305368692688175e-06, "loss": 1.3338, "step": 24 }, { "epoch": 2.632911392405063, "grad_norm": 1.0132853984832764, "learning_rate": 7.322330470336314e-07, "loss": 1.4753, "step": 25 }, { "epoch": 2.7341772151898733, "grad_norm": 0.958588182926178, "learning_rate": 4.774575140626317e-07, "loss": 1.3663, "step": 26 }, { "epoch": 2.8354430379746836, "grad_norm": 1.0243769884109497, "learning_rate": 2.7248368952908055e-07, "loss": 1.5555, "step": 27 }, { "epoch": 2.9367088607594938, "grad_norm": 1.283327579498291, "learning_rate": 1.223587092621162e-07, "loss": 1.9437, "step": 28 }, { "epoch": 3.088607594936709, "grad_norm": 0.9586451053619385, "learning_rate": 3.077914851215585e-08, "loss": 1.7471, "step": 29 }, { "epoch": 3.189873417721519, "grad_norm": 1.0949865579605103, "learning_rate": 0.0, "loss": 1.5175, "step": 30 } ], "logging_steps": 1, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.194232159961088e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }