{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 29150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 4.914236706689537e-05, "loss": 0.5563, "step": 500 }, { "epoch": 0.34, "learning_rate": 4.828473413379074e-05, "loss": 0.3171, "step": 1000 }, { "epoch": 0.51, "learning_rate": 4.742710120068611e-05, "loss": 0.2811, "step": 1500 }, { "epoch": 0.69, "learning_rate": 4.656946826758148e-05, "loss": 0.2556, "step": 2000 }, { "epoch": 0.86, "learning_rate": 4.5711835334476845e-05, "loss": 0.2458, "step": 2500 }, { "epoch": 1.03, "learning_rate": 4.4854202401372214e-05, "loss": 0.2329, "step": 3000 }, { "epoch": 1.2, "learning_rate": 4.399656946826758e-05, "loss": 0.2205, "step": 3500 }, { "epoch": 1.37, "learning_rate": 4.313893653516296e-05, "loss": 0.2172, "step": 4000 }, { "epoch": 1.54, "learning_rate": 4.228130360205832e-05, "loss": 0.2118, "step": 4500 }, { "epoch": 1.72, "learning_rate": 4.142367066895369e-05, "loss": 0.2059, "step": 5000 }, { "epoch": 1.89, "learning_rate": 4.0566037735849064e-05, "loss": 0.2047, "step": 5500 }, { "epoch": 2.06, "learning_rate": 3.9708404802744425e-05, "loss": 0.1974, "step": 6000 }, { "epoch": 2.23, "learning_rate": 3.8850771869639794e-05, "loss": 0.1945, "step": 6500 }, { "epoch": 2.4, "learning_rate": 3.799313893653517e-05, "loss": 0.1941, "step": 7000 }, { "epoch": 2.57, "learning_rate": 3.713550600343053e-05, "loss": 0.1897, "step": 7500 }, { "epoch": 2.74, "learning_rate": 3.62778730703259e-05, "loss": 0.1885, "step": 8000 }, { "epoch": 2.92, "learning_rate": 3.5420240137221275e-05, "loss": 0.1852, "step": 8500 }, { "epoch": 3.09, "learning_rate": 3.456260720411664e-05, "loss": 0.1813, "step": 9000 }, { "epoch": 3.26, "learning_rate": 3.3704974271012005e-05, "loss": 0.1798, "step": 9500 }, { "epoch": 3.43, "learning_rate": 3.284734133790738e-05, "loss": 0.1769, "step": 10000 }, { "epoch": 3.6, "learning_rate": 3.198970840480275e-05, "loss": 0.1765, "step": 10500 }, { "epoch": 3.77, "learning_rate": 3.113207547169811e-05, "loss": 0.1757, "step": 11000 }, { "epoch": 3.95, "learning_rate": 3.0274442538593483e-05, "loss": 0.1726, "step": 11500 }, { "epoch": 4.12, "learning_rate": 2.9416809605488855e-05, "loss": 0.1713, "step": 12000 }, { "epoch": 4.29, "learning_rate": 2.855917667238422e-05, "loss": 0.1704, "step": 12500 }, { "epoch": 4.46, "learning_rate": 2.770154373927959e-05, "loss": 0.1687, "step": 13000 }, { "epoch": 4.63, "learning_rate": 2.684391080617496e-05, "loss": 0.1674, "step": 13500 }, { "epoch": 4.8, "learning_rate": 2.5986277873070326e-05, "loss": 0.1671, "step": 14000 }, { "epoch": 4.97, "learning_rate": 2.5128644939965695e-05, "loss": 0.1657, "step": 14500 }, { "epoch": 5.15, "learning_rate": 2.4271012006861067e-05, "loss": 0.1635, "step": 15000 }, { "epoch": 5.32, "learning_rate": 2.3413379073756435e-05, "loss": 0.1623, "step": 15500 }, { "epoch": 5.49, "learning_rate": 2.25557461406518e-05, "loss": 0.1602, "step": 16000 }, { "epoch": 5.66, "learning_rate": 2.1698113207547172e-05, "loss": 0.1619, "step": 16500 }, { "epoch": 5.83, "learning_rate": 2.084048027444254e-05, "loss": 0.1597, "step": 17000 }, { "epoch": 6.0, "learning_rate": 1.998284734133791e-05, "loss": 0.1587, "step": 17500 }, { "epoch": 6.17, "learning_rate": 1.9125214408233278e-05, "loss": 0.1571, "step": 18000 }, { "epoch": 6.35, "learning_rate": 1.8267581475128647e-05, "loss": 0.1559, "step": 18500 }, { "epoch": 6.52, "learning_rate": 1.7409948542024015e-05, "loss": 0.1569, "step": 19000 }, { "epoch": 6.69, "learning_rate": 1.6552315608919384e-05, "loss": 0.1564, "step": 19500 }, { "epoch": 6.86, "learning_rate": 1.5694682675814752e-05, "loss": 0.1552, "step": 20000 }, { "epoch": 7.03, "learning_rate": 1.4837049742710121e-05, "loss": 0.1533, "step": 20500 }, { "epoch": 7.2, "learning_rate": 1.397941680960549e-05, "loss": 0.1514, "step": 21000 }, { "epoch": 7.38, "learning_rate": 1.312178387650086e-05, "loss": 0.1517, "step": 21500 }, { "epoch": 7.55, "learning_rate": 1.2264150943396227e-05, "loss": 0.1519, "step": 22000 }, { "epoch": 7.72, "learning_rate": 1.1406518010291597e-05, "loss": 0.1514, "step": 22500 }, { "epoch": 7.89, "learning_rate": 1.0548885077186965e-05, "loss": 0.1508, "step": 23000 }, { "epoch": 8.06, "learning_rate": 9.691252144082332e-06, "loss": 0.1489, "step": 23500 }, { "epoch": 8.23, "learning_rate": 8.833619210977703e-06, "loss": 0.1481, "step": 24000 }, { "epoch": 8.4, "learning_rate": 7.975986277873071e-06, "loss": 0.1474, "step": 24500 }, { "epoch": 8.58, "learning_rate": 7.11835334476844e-06, "loss": 0.1484, "step": 25000 }, { "epoch": 8.75, "learning_rate": 6.2607204116638075e-06, "loss": 0.1487, "step": 25500 }, { "epoch": 8.92, "learning_rate": 5.403087478559177e-06, "loss": 0.1458, "step": 26000 }, { "epoch": 9.09, "learning_rate": 4.5454545454545455e-06, "loss": 0.1455, "step": 26500 }, { "epoch": 9.26, "learning_rate": 3.687821612349914e-06, "loss": 0.1468, "step": 27000 }, { "epoch": 9.43, "learning_rate": 2.830188679245283e-06, "loss": 0.1448, "step": 27500 }, { "epoch": 9.61, "learning_rate": 1.972555746140652e-06, "loss": 0.1456, "step": 28000 }, { "epoch": 9.78, "learning_rate": 1.1149228130360207e-06, "loss": 0.1452, "step": 28500 }, { "epoch": 9.95, "learning_rate": 2.572898799313894e-07, "loss": 0.1465, "step": 29000 }, { "epoch": 10.0, "step": 29150, "train_runtime": 7427.3988, "train_samples_per_second": 3.925 } ], "max_steps": 29150, "num_train_epochs": 10, "total_flos": 160253787610644480, "trial_name": null, "trial_params": null }