{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0410958904109589, "eval_steps": 6, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016438356164383563, "grad_norm": 18.2674560546875, "learning_rate": 1e-05, "loss": 7.2877, "step": 1 }, { "epoch": 0.0016438356164383563, "eval_loss": 7.202574729919434, "eval_runtime": 400.2611, "eval_samples_per_second": 1.921, "eval_steps_per_second": 0.482, "step": 1 }, { "epoch": 0.0032876712328767125, "grad_norm": 17.67232894897461, "learning_rate": 2e-05, "loss": 7.1103, "step": 2 }, { "epoch": 0.004931506849315068, "grad_norm": 18.076366424560547, "learning_rate": 3e-05, "loss": 6.7461, "step": 3 }, { "epoch": 0.006575342465753425, "grad_norm": 16.23063850402832, "learning_rate": 4e-05, "loss": 6.8808, "step": 4 }, { "epoch": 0.00821917808219178, "grad_norm": 15.608308792114258, "learning_rate": 5e-05, "loss": 6.2408, "step": 5 }, { "epoch": 0.009863013698630137, "grad_norm": 15.950782775878906, "learning_rate": 6e-05, "loss": 5.2182, "step": 6 }, { "epoch": 0.009863013698630137, "eval_loss": 3.6735994815826416, "eval_runtime": 401.6303, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.481, "step": 6 }, { "epoch": 0.011506849315068493, "grad_norm": 17.16571044921875, "learning_rate": 7e-05, "loss": 3.5523, "step": 7 }, { "epoch": 0.01315068493150685, "grad_norm": 15.576335906982422, "learning_rate": 8e-05, "loss": 2.116, "step": 8 }, { "epoch": 0.014794520547945205, "grad_norm": 8.430649757385254, "learning_rate": 9e-05, "loss": 1.4004, "step": 9 }, { "epoch": 0.01643835616438356, "grad_norm": 4.160114288330078, "learning_rate": 0.0001, "loss": 0.9875, "step": 10 }, { "epoch": 0.01808219178082192, "grad_norm": 4.697726726531982, "learning_rate": 9.98458666866564e-05, "loss": 0.8455, "step": 11 }, { "epoch": 0.019726027397260273, "grad_norm": 3.292222499847412, "learning_rate": 9.938441702975689e-05, "loss": 0.7364, "step": 12 }, { "epoch": 0.019726027397260273, "eval_loss": 0.734883189201355, "eval_runtime": 401.7356, "eval_samples_per_second": 1.914, "eval_steps_per_second": 0.48, "step": 12 }, { "epoch": 0.021369863013698632, "grad_norm": 3.199315071105957, "learning_rate": 9.861849601988383e-05, "loss": 0.7187, "step": 13 }, { "epoch": 0.023013698630136987, "grad_norm": 3.5907278060913086, "learning_rate": 9.755282581475769e-05, "loss": 0.7173, "step": 14 }, { "epoch": 0.024657534246575342, "grad_norm": 9.821634292602539, "learning_rate": 9.619397662556435e-05, "loss": 0.9381, "step": 15 }, { "epoch": 0.0263013698630137, "grad_norm": 3.7522168159484863, "learning_rate": 9.45503262094184e-05, "loss": 0.744, "step": 16 }, { "epoch": 0.027945205479452055, "grad_norm": 2.0294668674468994, "learning_rate": 9.263200821770461e-05, "loss": 0.6902, "step": 17 }, { "epoch": 0.02958904109589041, "grad_norm": 2.3452353477478027, "learning_rate": 9.045084971874738e-05, "loss": 0.729, "step": 18 }, { "epoch": 0.02958904109589041, "eval_loss": 0.7356500029563904, "eval_runtime": 401.8552, "eval_samples_per_second": 1.914, "eval_steps_per_second": 0.48, "step": 18 }, { "epoch": 0.03123287671232877, "grad_norm": 3.9628705978393555, "learning_rate": 8.802029828000156e-05, "loss": 0.7545, "step": 19 }, { "epoch": 0.03287671232876712, "grad_norm": 1.9698278903961182, "learning_rate": 8.535533905932738e-05, "loss": 0.6791, "step": 20 }, { "epoch": 0.03452054794520548, "grad_norm": 1.0390818119049072, "learning_rate": 8.247240241650918e-05, "loss": 0.6828, "step": 21 }, { "epoch": 0.03616438356164384, "grad_norm": 3.9402551651000977, "learning_rate": 7.938926261462366e-05, "loss": 0.7803, "step": 22 }, { "epoch": 0.03780821917808219, "grad_norm": 6.37379264831543, "learning_rate": 7.612492823579745e-05, "loss": 0.883, "step": 23 }, { "epoch": 0.03945205479452055, "grad_norm": 3.2149131298065186, "learning_rate": 7.269952498697734e-05, "loss": 0.7417, "step": 24 }, { "epoch": 0.03945205479452055, "eval_loss": 0.7024959921836853, "eval_runtime": 401.4676, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.481, "step": 24 }, { "epoch": 0.0410958904109589, "grad_norm": 2.5781383514404297, "learning_rate": 6.91341716182545e-05, "loss": 0.7425, "step": 25 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.119015678246912e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }