{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.006348668895754857, "eval_steps": 6, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002539467558301943, "grad_norm": 2.3970961570739746, "learning_rate": 1e-05, "loss": 66.6603, "step": 1 }, { "epoch": 0.0002539467558301943, "eval_loss": 11.107478141784668, "eval_runtime": 18.8123, "eval_samples_per_second": 264.454, "eval_steps_per_second": 66.127, "step": 1 }, { "epoch": 0.0005078935116603886, "grad_norm": 2.4752585887908936, "learning_rate": 2e-05, "loss": 66.6705, "step": 2 }, { "epoch": 0.0007618402674905828, "grad_norm": 2.5148186683654785, "learning_rate": 3e-05, "loss": 66.6682, "step": 3 }, { "epoch": 0.0010157870233207772, "grad_norm": 2.3080971240997314, "learning_rate": 4e-05, "loss": 66.5837, "step": 4 }, { "epoch": 0.0012697337791509713, "grad_norm": 2.354560613632202, "learning_rate": 5e-05, "loss": 66.6436, "step": 5 }, { "epoch": 0.0015236805349811656, "grad_norm": 2.5473434925079346, "learning_rate": 6e-05, "loss": 66.6068, "step": 6 }, { "epoch": 0.0015236805349811656, "eval_loss": 11.101515769958496, "eval_runtime": 18.1722, "eval_samples_per_second": 273.77, "eval_steps_per_second": 68.456, "step": 6 }, { "epoch": 0.00177762729081136, "grad_norm": 2.5556399822235107, "learning_rate": 7e-05, "loss": 66.6045, "step": 7 }, { "epoch": 0.0020315740466415543, "grad_norm": 2.6539461612701416, "learning_rate": 8e-05, "loss": 66.6228, "step": 8 }, { "epoch": 0.0022855208024717484, "grad_norm": 2.602025032043457, "learning_rate": 9e-05, "loss": 66.4995, "step": 9 }, { "epoch": 0.0025394675583019426, "grad_norm": 2.4404804706573486, "learning_rate": 0.0001, "loss": 66.5529, "step": 10 }, { "epoch": 0.002793414314132137, "grad_norm": 2.2321112155914307, "learning_rate": 9.98458666866564e-05, "loss": 66.4916, "step": 11 }, { "epoch": 0.0030473610699623312, "grad_norm": 2.796868085861206, "learning_rate": 9.938441702975689e-05, "loss": 66.4966, "step": 12 }, { "epoch": 0.0030473610699623312, "eval_loss": 11.079522132873535, "eval_runtime": 18.1003, "eval_samples_per_second": 274.858, "eval_steps_per_second": 68.728, "step": 12 }, { "epoch": 0.0033013078257925254, "grad_norm": 2.591143846511841, "learning_rate": 9.861849601988383e-05, "loss": 66.4451, "step": 13 }, { "epoch": 0.00355525458162272, "grad_norm": 2.323777675628662, "learning_rate": 9.755282581475769e-05, "loss": 66.4647, "step": 14 }, { "epoch": 0.003809201337452914, "grad_norm": 2.9256598949432373, "learning_rate": 9.619397662556435e-05, "loss": 66.3829, "step": 15 }, { "epoch": 0.004063148093283109, "grad_norm": 2.4250593185424805, "learning_rate": 9.45503262094184e-05, "loss": 66.4283, "step": 16 }, { "epoch": 0.004317094849113303, "grad_norm": 2.607290029525757, "learning_rate": 9.263200821770461e-05, "loss": 66.2636, "step": 17 }, { "epoch": 0.004571041604943497, "grad_norm": 2.8356802463531494, "learning_rate": 9.045084971874738e-05, "loss": 66.2697, "step": 18 }, { "epoch": 0.004571041604943497, "eval_loss": 11.051901817321777, "eval_runtime": 18.1634, "eval_samples_per_second": 273.902, "eval_steps_per_second": 68.489, "step": 18 }, { "epoch": 0.004824988360773691, "grad_norm": 2.4801783561706543, "learning_rate": 8.802029828000156e-05, "loss": 66.3638, "step": 19 }, { "epoch": 0.005078935116603885, "grad_norm": 2.5052285194396973, "learning_rate": 8.535533905932738e-05, "loss": 66.2984, "step": 20 }, { "epoch": 0.005332881872434079, "grad_norm": 2.5191738605499268, "learning_rate": 8.247240241650918e-05, "loss": 66.3049, "step": 21 }, { "epoch": 0.005586828628264274, "grad_norm": 2.384291887283325, "learning_rate": 7.938926261462366e-05, "loss": 66.2243, "step": 22 }, { "epoch": 0.005840775384094468, "grad_norm": 2.3815808296203613, "learning_rate": 7.612492823579745e-05, "loss": 66.3124, "step": 23 }, { "epoch": 0.0060947221399246625, "grad_norm": 2.601182222366333, "learning_rate": 7.269952498697734e-05, "loss": 66.2157, "step": 24 }, { "epoch": 0.0060947221399246625, "eval_loss": 11.026150703430176, "eval_runtime": 18.1547, "eval_samples_per_second": 274.034, "eval_steps_per_second": 68.522, "step": 24 }, { "epoch": 0.006348668895754857, "grad_norm": 2.959272861480713, "learning_rate": 6.91341716182545e-05, "loss": 66.0543, "step": 25 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3572917862400.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }