{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9645776566757496, "eval_steps": 500, "global_step": 135, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21798365122615804, "grad_norm": 2.1800439383266883, "learning_rate": 5e-06, "loss": 1.0138, "step": 10 }, { "epoch": 0.4359673024523161, "grad_norm": 2.321066726127001, "learning_rate": 5e-06, "loss": 0.9311, "step": 20 }, { "epoch": 0.6539509536784741, "grad_norm": 1.2528230223771615, "learning_rate": 5e-06, "loss": 0.902, "step": 30 }, { "epoch": 0.8719346049046321, "grad_norm": 1.3255206188515178, "learning_rate": 5e-06, "loss": 0.8817, "step": 40 }, { "epoch": 0.9809264305177112, "eval_loss": 0.8728658556938171, "eval_runtime": 32.8266, "eval_samples_per_second": 37.652, "eval_steps_per_second": 0.609, "step": 45 }, { "epoch": 1.1008174386920981, "grad_norm": 1.1084771353250091, "learning_rate": 5e-06, "loss": 0.9119, "step": 50 }, { "epoch": 1.318801089918256, "grad_norm": 1.1331211841837239, "learning_rate": 5e-06, "loss": 0.8201, "step": 60 }, { "epoch": 1.5367847411444142, "grad_norm": 0.7209847897858535, "learning_rate": 5e-06, "loss": 0.8167, "step": 70 }, { "epoch": 1.7547683923705724, "grad_norm": 0.6424432226183309, "learning_rate": 5e-06, "loss": 0.8136, "step": 80 }, { "epoch": 1.9727520435967303, "grad_norm": 0.6477980910912198, "learning_rate": 5e-06, "loss": 0.8115, "step": 90 }, { "epoch": 1.9727520435967303, "eval_loss": 0.8485425710678101, "eval_runtime": 32.0874, "eval_samples_per_second": 38.52, "eval_steps_per_second": 0.623, "step": 90 }, { "epoch": 2.2016348773841963, "grad_norm": 0.9029436230736683, "learning_rate": 5e-06, "loss": 0.8127, "step": 100 }, { "epoch": 2.4196185286103544, "grad_norm": 0.6308547054658898, "learning_rate": 5e-06, "loss": 0.746, "step": 110 }, { "epoch": 2.637602179836512, "grad_norm": 0.6233413271157902, "learning_rate": 5e-06, "loss": 0.7542, "step": 120 }, { "epoch": 2.8555858310626703, "grad_norm": 0.6392219564241265, "learning_rate": 5e-06, "loss": 0.7547, "step": 130 }, { "epoch": 2.9645776566757496, "eval_loss": 0.8474076986312866, "eval_runtime": 32.1945, "eval_samples_per_second": 38.392, "eval_steps_per_second": 0.621, "step": 135 }, { "epoch": 2.9645776566757496, "step": 135, "total_flos": 225920648478720.0, "train_loss": 0.840225980899952, "train_runtime": 4572.4428, "train_samples_per_second": 15.402, "train_steps_per_second": 0.03 } ], "logging_steps": 10, "max_steps": 135, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 225920648478720.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }