{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.20020782442644267, "global_step": 1150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, "loss": 2.8988, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-06, "loss": 2.9604, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.8e-06, "loss": 2.8234, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.4000000000000003e-06, "loss": 2.7224, "step": 40 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 2.6824, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.6e-06, "loss": 2.5884, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.2000000000000004e-06, "loss": 2.4736, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-06, "loss": 2.3262, "step": 80 }, { "epoch": 0.02, "learning_rate": 5.4e-06, "loss": 2.3179, "step": 90 }, { "epoch": 0.02, "learning_rate": 6e-06, "loss": 2.3003, "step": 100 }, { "epoch": 0.02, "learning_rate": 6.6e-06, "loss": 2.2296, "step": 110 }, { "epoch": 0.02, "learning_rate": 7.2e-06, "loss": 2.1433, "step": 120 }, { "epoch": 0.02, "learning_rate": 7.8e-06, "loss": 2.1452, "step": 130 }, { "epoch": 0.02, "learning_rate": 8.400000000000001e-06, "loss": 2.078, "step": 140 }, { "epoch": 0.03, "learning_rate": 9e-06, "loss": 2.0396, "step": 150 }, { "epoch": 0.03, "learning_rate": 9.600000000000001e-06, "loss": 2.0853, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.02e-05, "loss": 2.0704, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.08e-05, "loss": 1.9803, "step": 180 }, { "epoch": 0.03, "learning_rate": 1.1400000000000001e-05, "loss": 2.0011, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.2e-05, "loss": 1.93, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.26e-05, "loss": 1.9777, "step": 210 }, { "epoch": 0.04, "learning_rate": 1.32e-05, "loss": 1.8732, "step": 220 }, { "epoch": 0.04, "learning_rate": 1.3800000000000002e-05, "loss": 1.8803, "step": 230 }, { "epoch": 0.04, "learning_rate": 1.44e-05, "loss": 1.8258, "step": 240 }, { "epoch": 0.04, "learning_rate": 1.5e-05, "loss": 1.8486, "step": 250 }, { "epoch": 0.05, "learning_rate": 1.56e-05, "loss": 1.8054, "step": 260 }, { "epoch": 0.05, "learning_rate": 1.62e-05, "loss": 1.8284, "step": 270 }, { "epoch": 0.05, "learning_rate": 1.6800000000000002e-05, "loss": 1.7647, "step": 280 }, { "epoch": 0.05, "learning_rate": 1.74e-05, "loss": 1.7801, "step": 290 }, { "epoch": 0.05, "learning_rate": 1.8e-05, "loss": 1.7765, "step": 300 }, { "epoch": 0.05, "learning_rate": 1.86e-05, "loss": 1.7285, "step": 310 }, { "epoch": 0.06, "learning_rate": 1.9200000000000003e-05, "loss": 1.7652, "step": 320 }, { "epoch": 0.06, "learning_rate": 1.98e-05, "loss": 1.7792, "step": 330 }, { "epoch": 0.06, "learning_rate": 2.04e-05, "loss": 1.7661, "step": 340 }, { "epoch": 0.06, "learning_rate": 2.1e-05, "loss": 1.7171, "step": 350 }, { "epoch": 0.06, "learning_rate": 2.16e-05, "loss": 1.7829, "step": 360 }, { "epoch": 0.06, "learning_rate": 2.22e-05, "loss": 1.752, "step": 370 }, { "epoch": 0.07, "learning_rate": 2.2800000000000002e-05, "loss": 1.6864, "step": 380 }, { "epoch": 0.07, "learning_rate": 2.3400000000000003e-05, "loss": 1.7163, "step": 390 }, { "epoch": 0.07, "learning_rate": 2.4e-05, "loss": 1.7071, "step": 400 }, { "epoch": 0.07, "learning_rate": 2.4599999999999998e-05, "loss": 1.7115, "step": 410 }, { "epoch": 0.07, "learning_rate": 2.52e-05, "loss": 1.7248, "step": 420 }, { "epoch": 0.07, "learning_rate": 2.58e-05, "loss": 1.6862, "step": 430 }, { "epoch": 0.08, "learning_rate": 2.64e-05, "loss": 1.6393, "step": 440 }, { "epoch": 0.08, "learning_rate": 2.7000000000000002e-05, "loss": 1.6815, "step": 450 }, { "epoch": 0.08, "learning_rate": 2.7600000000000003e-05, "loss": 1.6933, "step": 460 }, { "epoch": 0.08, "learning_rate": 2.8199999999999998e-05, "loss": 1.6984, "step": 470 }, { "epoch": 0.08, "learning_rate": 2.88e-05, "loss": 1.6677, "step": 480 }, { "epoch": 0.09, "learning_rate": 2.94e-05, "loss": 1.6683, "step": 490 }, { "epoch": 0.09, "learning_rate": 3e-05, "loss": 1.6234, "step": 500 }, { "epoch": 0.09, "learning_rate": 2.9880000000000002e-05, "loss": 1.6206, "step": 510 }, { "epoch": 0.09, "learning_rate": 2.976e-05, "loss": 1.6712, "step": 520 }, { "epoch": 0.09, "learning_rate": 2.964e-05, "loss": 1.652, "step": 530 }, { "epoch": 0.09, "learning_rate": 2.9520000000000002e-05, "loss": 1.6202, "step": 540 }, { "epoch": 0.1, "learning_rate": 2.94e-05, "loss": 1.6745, "step": 550 }, { "epoch": 0.1, "learning_rate": 2.928e-05, "loss": 1.6772, "step": 560 }, { "epoch": 0.1, "learning_rate": 2.916e-05, "loss": 1.6238, "step": 570 }, { "epoch": 0.1, "learning_rate": 2.904e-05, "loss": 1.5743, "step": 580 }, { "epoch": 0.1, "learning_rate": 2.892e-05, "loss": 1.6013, "step": 590 }, { "epoch": 0.1, "learning_rate": 2.88e-05, "loss": 1.5846, "step": 600 }, { "epoch": 0.11, "learning_rate": 2.868e-05, "loss": 1.6274, "step": 610 }, { "epoch": 0.11, "learning_rate": 2.856e-05, "loss": 1.6018, "step": 620 }, { "epoch": 0.11, "learning_rate": 2.844e-05, "loss": 1.5927, "step": 630 }, { "epoch": 0.11, "learning_rate": 2.832e-05, "loss": 1.5887, "step": 640 }, { "epoch": 0.11, "learning_rate": 2.8199999999999998e-05, "loss": 1.5446, "step": 650 }, { "epoch": 0.11, "learning_rate": 2.8080000000000002e-05, "loss": 1.6083, "step": 660 }, { "epoch": 0.12, "learning_rate": 2.7960000000000003e-05, "loss": 1.575, "step": 670 }, { "epoch": 0.12, "learning_rate": 2.784e-05, "loss": 1.6243, "step": 680 }, { "epoch": 0.12, "learning_rate": 2.7720000000000002e-05, "loss": 1.5957, "step": 690 }, { "epoch": 0.12, "learning_rate": 2.7600000000000003e-05, "loss": 1.5684, "step": 700 }, { "epoch": 0.12, "learning_rate": 2.748e-05, "loss": 1.5778, "step": 710 }, { "epoch": 0.13, "learning_rate": 2.7360000000000002e-05, "loss": 1.6025, "step": 720 }, { "epoch": 0.13, "learning_rate": 2.724e-05, "loss": 1.56, "step": 730 }, { "epoch": 0.13, "learning_rate": 2.712e-05, "loss": 1.5475, "step": 740 }, { "epoch": 0.13, "learning_rate": 2.7000000000000002e-05, "loss": 1.5322, "step": 750 }, { "epoch": 0.13, "learning_rate": 2.688e-05, "loss": 1.5708, "step": 760 }, { "epoch": 0.13, "learning_rate": 2.676e-05, "loss": 1.5688, "step": 770 }, { "epoch": 0.14, "learning_rate": 2.6640000000000002e-05, "loss": 1.5407, "step": 780 }, { "epoch": 0.14, "learning_rate": 2.652e-05, "loss": 1.4945, "step": 790 }, { "epoch": 0.14, "learning_rate": 2.64e-05, "loss": 1.5536, "step": 800 }, { "epoch": 0.14, "learning_rate": 2.628e-05, "loss": 1.5501, "step": 810 }, { "epoch": 0.14, "learning_rate": 2.616e-05, "loss": 1.5184, "step": 820 }, { "epoch": 0.14, "learning_rate": 2.604e-05, "loss": 1.5046, "step": 830 }, { "epoch": 0.15, "learning_rate": 2.592e-05, "loss": 1.5529, "step": 840 }, { "epoch": 0.15, "learning_rate": 2.58e-05, "loss": 1.5006, "step": 850 }, { "epoch": 0.15, "learning_rate": 2.568e-05, "loss": 1.4979, "step": 860 }, { "epoch": 0.15, "learning_rate": 2.556e-05, "loss": 1.5288, "step": 870 }, { "epoch": 0.15, "learning_rate": 2.544e-05, "loss": 1.5924, "step": 880 }, { "epoch": 0.15, "learning_rate": 2.5319999999999998e-05, "loss": 1.5031, "step": 890 }, { "epoch": 0.16, "learning_rate": 2.52e-05, "loss": 1.5628, "step": 900 }, { "epoch": 0.16, "learning_rate": 2.508e-05, "loss": 1.5173, "step": 910 }, { "epoch": 0.16, "learning_rate": 2.4959999999999998e-05, "loss": 1.5159, "step": 920 }, { "epoch": 0.16, "learning_rate": 2.484e-05, "loss": 1.4837, "step": 930 }, { "epoch": 0.16, "learning_rate": 2.472e-05, "loss": 1.528, "step": 940 }, { "epoch": 0.17, "learning_rate": 2.4599999999999998e-05, "loss": 1.5605, "step": 950 }, { "epoch": 0.17, "learning_rate": 2.448e-05, "loss": 1.4884, "step": 960 }, { "epoch": 0.17, "learning_rate": 2.4360000000000004e-05, "loss": 1.5194, "step": 970 }, { "epoch": 0.17, "learning_rate": 2.4240000000000002e-05, "loss": 1.5145, "step": 980 }, { "epoch": 0.17, "learning_rate": 2.4120000000000003e-05, "loss": 1.5127, "step": 990 }, { "epoch": 0.17, "learning_rate": 2.4e-05, "loss": 1.5129, "step": 1000 }, { "epoch": 0.18, "learning_rate": 2.3880000000000002e-05, "loss": 1.4943, "step": 1010 }, { "epoch": 0.18, "learning_rate": 2.3760000000000003e-05, "loss": 1.5653, "step": 1020 }, { "epoch": 0.18, "learning_rate": 2.364e-05, "loss": 1.5496, "step": 1030 }, { "epoch": 0.18, "learning_rate": 2.3520000000000002e-05, "loss": 1.5118, "step": 1040 }, { "epoch": 0.18, "learning_rate": 2.3400000000000003e-05, "loss": 1.4897, "step": 1050 }, { "epoch": 0.18, "learning_rate": 2.328e-05, "loss": 1.5305, "step": 1060 }, { "epoch": 0.19, "learning_rate": 2.3160000000000002e-05, "loss": 1.518, "step": 1070 }, { "epoch": 0.19, "learning_rate": 2.304e-05, "loss": 1.525, "step": 1080 }, { "epoch": 0.19, "learning_rate": 2.292e-05, "loss": 1.4876, "step": 1090 }, { "epoch": 0.19, "learning_rate": 2.2800000000000002e-05, "loss": 1.4853, "step": 1100 }, { "epoch": 0.19, "learning_rate": 2.268e-05, "loss": 1.5047, "step": 1110 }, { "epoch": 0.19, "learning_rate": 2.256e-05, "loss": 1.4842, "step": 1120 }, { "epoch": 0.2, "learning_rate": 2.2440000000000002e-05, "loss": 1.5162, "step": 1130 }, { "epoch": 0.2, "learning_rate": 2.232e-05, "loss": 1.5132, "step": 1140 }, { "epoch": 0.2, "learning_rate": 2.22e-05, "loss": 1.4743, "step": 1150 } ], "max_steps": 3000, "num_train_epochs": 1, "total_flos": 1.93471168118784e+17, "trial_name": null, "trial_params": null }