{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.917808219178082, "eval_steps": 9, "global_step": 108, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4931506849315068, "grad_norm": 0.10725488513708115, "learning_rate": 4e-05, "loss": 1.419, "step": 9 }, { "epoch": 0.4931506849315068, "eval_runtime": 28.5255, "eval_samples_per_second": 2.209, "eval_steps_per_second": 1.122, "step": 9 }, { "epoch": 0.9863013698630136, "grad_norm": 0.15689760446548462, "learning_rate": 4e-05, "loss": 1.344, "step": 18 }, { "epoch": 0.9863013698630136, "eval_runtime": 28.4389, "eval_samples_per_second": 2.215, "eval_steps_per_second": 1.125, "step": 18 }, { "epoch": 1.4794520547945205, "grad_norm": 0.19748298823833466, "learning_rate": 4e-05, "loss": 1.3077, "step": 27 }, { "epoch": 1.4794520547945205, "eval_runtime": 28.4449, "eval_samples_per_second": 2.215, "eval_steps_per_second": 1.125, "step": 27 }, { "epoch": 1.9726027397260273, "grad_norm": 0.2421897053718567, "learning_rate": 4e-05, "loss": 1.1656, "step": 36 }, { "epoch": 1.9726027397260273, "eval_runtime": 28.4768, "eval_samples_per_second": 2.212, "eval_steps_per_second": 1.124, "step": 36 }, { "epoch": 2.4657534246575343, "grad_norm": 0.30667445063591003, "learning_rate": 4e-05, "loss": 1.0709, "step": 45 }, { "epoch": 2.4657534246575343, "eval_runtime": 28.4429, "eval_samples_per_second": 2.215, "eval_steps_per_second": 1.125, "step": 45 }, { "epoch": 2.958904109589041, "grad_norm": 0.43574613332748413, "learning_rate": 4e-05, "loss": 0.9364, "step": 54 }, { "epoch": 2.958904109589041, "eval_runtime": 28.42, "eval_samples_per_second": 2.217, "eval_steps_per_second": 1.126, "step": 54 }, { "epoch": 3.452054794520548, "grad_norm": 0.5465545058250427, "learning_rate": 4e-05, "loss": 0.7984, "step": 63 }, { "epoch": 3.452054794520548, "eval_runtime": 28.395, "eval_samples_per_second": 2.219, "eval_steps_per_second": 1.127, "step": 63 }, { "epoch": 3.9452054794520546, "grad_norm": 0.3935684561729431, "learning_rate": 4e-05, "loss": 0.6656, "step": 72 }, { "epoch": 3.9452054794520546, "eval_runtime": 28.3262, "eval_samples_per_second": 2.224, "eval_steps_per_second": 1.13, "step": 72 }, { "epoch": 4.438356164383562, "grad_norm": 0.2613831162452698, "learning_rate": 4e-05, "loss": 0.6008, "step": 81 }, { "epoch": 4.438356164383562, "eval_runtime": 28.3252, "eval_samples_per_second": 2.224, "eval_steps_per_second": 1.13, "step": 81 }, { "epoch": 4.931506849315069, "grad_norm": 0.3327048718929291, "learning_rate": 4e-05, "loss": 0.5703, "step": 90 }, { "epoch": 4.931506849315069, "eval_runtime": 28.3611, "eval_samples_per_second": 2.221, "eval_steps_per_second": 1.128, "step": 90 }, { "epoch": 5.424657534246576, "grad_norm": 0.26965251564979553, "learning_rate": 4e-05, "loss": 0.5243, "step": 99 }, { "epoch": 5.424657534246576, "eval_runtime": 28.3252, "eval_samples_per_second": 2.224, "eval_steps_per_second": 1.13, "step": 99 }, { "epoch": 5.917808219178082, "grad_norm": 0.2745685577392578, "learning_rate": 4e-05, "loss": 0.5232, "step": 108 }, { "epoch": 5.917808219178082, "eval_runtime": 28.3761, "eval_samples_per_second": 2.22, "eval_steps_per_second": 1.128, "step": 108 } ], "logging_steps": 9, "max_steps": 108, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.763528609792e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }