|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.20020782442644267, |
|
"global_step": 1150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 2.8988, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 2.9604, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8e-06, |
|
"loss": 2.8234, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 2.7224, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-06, |
|
"loss": 2.6824, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.6e-06, |
|
"loss": 2.5884, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 2.4736, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 2.3262, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.4e-06, |
|
"loss": 2.3179, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3003, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.6e-06, |
|
"loss": 2.2296, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.2e-06, |
|
"loss": 2.1433, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.8e-06, |
|
"loss": 2.1452, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 2.078, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9e-06, |
|
"loss": 2.0396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 2.0853, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.02e-05, |
|
"loss": 2.0704, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.08e-05, |
|
"loss": 1.9803, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.1400000000000001e-05, |
|
"loss": 2.0011, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.93, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.26e-05, |
|
"loss": 1.9777, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.32e-05, |
|
"loss": 1.8732, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3800000000000002e-05, |
|
"loss": 1.8803, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.44e-05, |
|
"loss": 1.8258, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.8486, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.56e-05, |
|
"loss": 1.8054, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.62e-05, |
|
"loss": 1.8284, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 1.7647, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.74e-05, |
|
"loss": 1.7801, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.7765, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.86e-05, |
|
"loss": 1.7285, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 1.7652, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.98e-05, |
|
"loss": 1.7792, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.04e-05, |
|
"loss": 1.7661, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.1e-05, |
|
"loss": 1.7171, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.16e-05, |
|
"loss": 1.7829, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.22e-05, |
|
"loss": 1.752, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 1.6864, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.3400000000000003e-05, |
|
"loss": 1.7163, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.7071, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.4599999999999998e-05, |
|
"loss": 1.7115, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.52e-05, |
|
"loss": 1.7248, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.58e-05, |
|
"loss": 1.6862, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.64e-05, |
|
"loss": 1.6393, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 1.6815, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7600000000000003e-05, |
|
"loss": 1.6933, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.8199999999999998e-05, |
|
"loss": 1.6984, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.88e-05, |
|
"loss": 1.6677, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.94e-05, |
|
"loss": 1.6683, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6234, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9880000000000002e-05, |
|
"loss": 1.6206, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.976e-05, |
|
"loss": 1.6712, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.964e-05, |
|
"loss": 1.652, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9520000000000002e-05, |
|
"loss": 1.6202, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.94e-05, |
|
"loss": 1.6745, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.928e-05, |
|
"loss": 1.6772, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.916e-05, |
|
"loss": 1.6238, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.904e-05, |
|
"loss": 1.5743, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.892e-05, |
|
"loss": 1.6013, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.88e-05, |
|
"loss": 1.5846, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.868e-05, |
|
"loss": 1.6274, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.856e-05, |
|
"loss": 1.6018, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.844e-05, |
|
"loss": 1.5927, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.832e-05, |
|
"loss": 1.5887, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8199999999999998e-05, |
|
"loss": 1.5446, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8080000000000002e-05, |
|
"loss": 1.6083, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.7960000000000003e-05, |
|
"loss": 1.575, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.784e-05, |
|
"loss": 1.6243, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.7720000000000002e-05, |
|
"loss": 1.5957, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.7600000000000003e-05, |
|
"loss": 1.5684, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.748e-05, |
|
"loss": 1.5778, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.7360000000000002e-05, |
|
"loss": 1.6025, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.724e-05, |
|
"loss": 1.56, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.712e-05, |
|
"loss": 1.5475, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 1.5322, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.688e-05, |
|
"loss": 1.5708, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.676e-05, |
|
"loss": 1.5688, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.6640000000000002e-05, |
|
"loss": 1.5407, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.652e-05, |
|
"loss": 1.4945, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.64e-05, |
|
"loss": 1.5536, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.628e-05, |
|
"loss": 1.5501, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.616e-05, |
|
"loss": 1.5184, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.604e-05, |
|
"loss": 1.5046, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.592e-05, |
|
"loss": 1.5529, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.58e-05, |
|
"loss": 1.5006, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.568e-05, |
|
"loss": 1.4979, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.556e-05, |
|
"loss": 1.5288, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.544e-05, |
|
"loss": 1.5924, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5319999999999998e-05, |
|
"loss": 1.5031, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.52e-05, |
|
"loss": 1.5628, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.508e-05, |
|
"loss": 1.5173, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.4959999999999998e-05, |
|
"loss": 1.5159, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.484e-05, |
|
"loss": 1.4837, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.472e-05, |
|
"loss": 1.528, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4599999999999998e-05, |
|
"loss": 1.5605, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.448e-05, |
|
"loss": 1.4884, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4360000000000004e-05, |
|
"loss": 1.5194, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4240000000000002e-05, |
|
"loss": 1.5145, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4120000000000003e-05, |
|
"loss": 1.5127, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.5129, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.3880000000000002e-05, |
|
"loss": 1.4943, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.3760000000000003e-05, |
|
"loss": 1.5653, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.364e-05, |
|
"loss": 1.5496, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.3520000000000002e-05, |
|
"loss": 1.5118, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.3400000000000003e-05, |
|
"loss": 1.4897, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.328e-05, |
|
"loss": 1.5305, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.3160000000000002e-05, |
|
"loss": 1.518, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.304e-05, |
|
"loss": 1.525, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.292e-05, |
|
"loss": 1.4876, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 1.4853, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.268e-05, |
|
"loss": 1.5047, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.256e-05, |
|
"loss": 1.4842, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.2440000000000002e-05, |
|
"loss": 1.5162, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.232e-05, |
|
"loss": 1.5132, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.22e-05, |
|
"loss": 1.4743, |
|
"step": 1150 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.93471168118784e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|