{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9125329935378175, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 9.757045675413022e-05, "loss": 1.7333, "step": 50 }, { "epoch": 0.15, "learning_rate": 9.514091350826045e-05, "loss": 1.3819, "step": 100 }, { "epoch": 0.22, "learning_rate": 9.271137026239067e-05, "loss": 1.3078, "step": 150 }, { "epoch": 0.29, "learning_rate": 9.02818270165209e-05, "loss": 1.3028, "step": 200 }, { "epoch": 0.36, "learning_rate": 8.785228377065113e-05, "loss": 1.2742, "step": 250 }, { "epoch": 0.44, "learning_rate": 8.542274052478134e-05, "loss": 1.2594, "step": 300 }, { "epoch": 0.51, "learning_rate": 8.299319727891157e-05, "loss": 1.2554, "step": 350 }, { "epoch": 0.58, "learning_rate": 8.056365403304179e-05, "loss": 1.2412, "step": 400 }, { "epoch": 0.66, "learning_rate": 7.8134110787172e-05, "loss": 1.2448, "step": 450 }, { "epoch": 0.73, "learning_rate": 7.570456754130224e-05, "loss": 1.2325, "step": 500 }, { "epoch": 0.8, "learning_rate": 7.327502429543247e-05, "loss": 1.2233, "step": 550 }, { "epoch": 0.87, "learning_rate": 7.08454810495627e-05, "loss": 1.2326, "step": 600 }, { "epoch": 0.95, "learning_rate": 6.841593780369291e-05, "loss": 1.2112, "step": 650 }, { "epoch": 1.02, "learning_rate": 6.598639455782313e-05, "loss": 1.2144, "step": 700 }, { "epoch": 1.09, "learning_rate": 6.355685131195336e-05, "loss": 1.1948, "step": 750 }, { "epoch": 1.17, "learning_rate": 6.112730806608357e-05, "loss": 1.2022, "step": 800 }, { "epoch": 1.24, "learning_rate": 5.8697764820213804e-05, "loss": 1.2028, "step": 850 }, { "epoch": 1.31, "learning_rate": 5.626822157434403e-05, "loss": 1.1978, "step": 900 }, { "epoch": 1.38, "learning_rate": 5.3838678328474244e-05, "loss": 1.1879, "step": 950 }, { "epoch": 1.46, "learning_rate": 5.1409135082604474e-05, "loss": 1.188, "step": 1000 }, { "epoch": 1.53, "learning_rate": 4.89795918367347e-05, "loss": 1.1836, "step": 1050 }, { "epoch": 1.6, "learning_rate": 4.655004859086492e-05, "loss": 1.1794, "step": 1100 }, { "epoch": 1.67, "learning_rate": 4.412050534499514e-05, "loss": 1.1815, "step": 1150 }, { "epoch": 1.75, "learning_rate": 4.1690962099125366e-05, "loss": 1.1856, "step": 1200 }, { "epoch": 1.82, "learning_rate": 3.926141885325559e-05, "loss": 1.1747, "step": 1250 }, { "epoch": 1.89, "learning_rate": 3.683187560738581e-05, "loss": 1.1736, "step": 1300 }, { "epoch": 1.97, "learning_rate": 3.4402332361516035e-05, "loss": 1.1899, "step": 1350 }, { "epoch": 2.04, "learning_rate": 3.1972789115646265e-05, "loss": 1.1726, "step": 1400 }, { "epoch": 2.11, "learning_rate": 2.954324586977648e-05, "loss": 1.1544, "step": 1450 }, { "epoch": 2.18, "learning_rate": 2.7113702623906705e-05, "loss": 1.1568, "step": 1500 }, { "epoch": 2.26, "learning_rate": 2.468415937803693e-05, "loss": 1.1684, "step": 1550 }, { "epoch": 2.33, "learning_rate": 2.225461613216715e-05, "loss": 1.1629, "step": 1600 }, { "epoch": 2.4, "learning_rate": 1.9825072886297377e-05, "loss": 1.1558, "step": 1650 }, { "epoch": 2.48, "learning_rate": 1.73955296404276e-05, "loss": 1.1608, "step": 1700 }, { "epoch": 2.55, "learning_rate": 1.4965986394557824e-05, "loss": 1.155, "step": 1750 }, { "epoch": 2.62, "learning_rate": 1.2536443148688048e-05, "loss": 1.153, "step": 1800 }, { "epoch": 2.69, "learning_rate": 1.010689990281827e-05, "loss": 1.1597, "step": 1850 }, { "epoch": 2.77, "learning_rate": 7.677356656948493e-06, "loss": 1.1517, "step": 1900 }, { "epoch": 2.84, "learning_rate": 5.247813411078718e-06, "loss": 1.1488, "step": 1950 }, { "epoch": 2.91, "learning_rate": 2.818270165208941e-06, "loss": 1.1602, "step": 2000 } ], "max_steps": 2058, "num_train_epochs": 3, "total_flos": 7.611953410899763e+17, "trial_name": null, "trial_params": null }