{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 3460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.89, "learning_rate": 3.554913294797688e-05, "loss": 2.962, "step": 500 }, { "epoch": 5.78, "learning_rate": 2.1098265895953757e-05, "loss": 2.596, "step": 1000 }, { "epoch": 8.67, "learning_rate": 6.647398843930635e-06, "loss": 2.5038, "step": 1500 }, { "epoch": 10.4, "learning_rate": 2.3988439306358382e-05, "loss": 2.4779, "step": 1800 }, { "epoch": 10.98, "learning_rate": 2.254335260115607e-05, "loss": 2.4677, "step": 1900 }, { "epoch": 11.56, "learning_rate": 2.1098265895953757e-05, "loss": 2.4479, "step": 2000 }, { "epoch": 12.14, "learning_rate": 1.9653179190751446e-05, "loss": 2.4503, "step": 2100 }, { "epoch": 12.72, "learning_rate": 1.8208092485549132e-05, "loss": 2.4222, "step": 2200 }, { "epoch": 13.29, "learning_rate": 1.676300578034682e-05, "loss": 2.4106, "step": 2300 }, { "epoch": 13.87, "learning_rate": 1.531791907514451e-05, "loss": 2.4054, "step": 2400 }, { "epoch": 14.45, "learning_rate": 1.3872832369942197e-05, "loss": 2.4101, "step": 2500 }, { "epoch": 15.03, "learning_rate": 1.2427745664739884e-05, "loss": 2.3919, "step": 2600 }, { "epoch": 15.61, "learning_rate": 1.0982658959537573e-05, "loss": 2.3841, "step": 2700 }, { "epoch": 16.18, "learning_rate": 9.53757225433526e-06, "loss": 2.377, "step": 2800 }, { "epoch": 16.76, "learning_rate": 8.092485549132949e-06, "loss": 2.3716, "step": 2900 }, { "epoch": 17.34, "learning_rate": 6.647398843930635e-06, "loss": 2.3867, "step": 3000 }, { "epoch": 17.92, "learning_rate": 5.202312138728324e-06, "loss": 2.3632, "step": 3100 }, { "epoch": 18.5, "learning_rate": 3.757225433526012e-06, "loss": 2.3578, "step": 3200 }, { "epoch": 19.08, "learning_rate": 2.3121387283236993e-06, "loss": 2.3616, "step": 3300 }, { "epoch": 19.65, "learning_rate": 8.670520231213873e-07, "loss": 2.3635, "step": 3400 }, { "epoch": 20.0, "step": 3460, "total_flos": 5.296559685853962e+17, "train_loss": 1.2214314234739094, "train_runtime": 4297.3848, "train_samples_per_second": 51.399, "train_steps_per_second": 0.805 } ], "max_steps": 3460, "num_train_epochs": 20, "total_flos": 5.296559685853962e+17, "trial_name": null, "trial_params": null }