|
{ |
|
"best_metric": 2.670438766479492, |
|
"best_model_checkpoint": "output/bob-dylan/checkpoint-321", |
|
"epoch": 1.0, |
|
"global_step": 321, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013711788223044424, |
|
"loss": 3.6967, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001368717255202631, |
|
"loss": 3.7692, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00013646211919423798, |
|
"loss": 3.4916, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00013589004389254062, |
|
"loss": 3.6186, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00013515686922297834, |
|
"loss": 3.5756, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00013426435048201062, |
|
"loss": 3.236, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001332146244523866, |
|
"loss": 3.3956, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00013201020428746477, |
|
"loss": 3.3277, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001306539734944624, |
|
"loss": 3.2617, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00012914917903103908, |
|
"loss": 3.554, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00012749942353174222, |
|
"loss": 3.2625, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00012570865668292503, |
|
"loss": 3.2986, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001237811657667863, |
|
"loss": 3.264, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00012172156539717071, |
|
"loss": 3.0437, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00011953478647170303, |
|
"loss": 3.0527, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000117226064366706, |
|
"loss": 3.3623, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001148009264031647, |
|
"loss": 3.0801, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001122651786137447, |
|
"loss": 3.0471, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00010962489184254581, |
|
"loss": 3.051, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00010688638721086951, |
|
"loss": 3.2191, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001040562209837965, |
|
"loss": 2.9343, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00010114116887380613, |
|
"loss": 2.9893, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.814820981901533e-05, |
|
"loss": 3.1068, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.508450927487455e-05, |
|
"loss": 3.1636, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.195740205932179e-05, |
|
"loss": 3.1864, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.877437479246497e-05, |
|
"loss": 2.9116, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.554304797283438e-05, |
|
"loss": 3.1795, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.227115773311617e-05, |
|
"loss": 2.9074, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.896653731904552e-05, |
|
"loss": 3.1763, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.56370983358012e-05, |
|
"loss": 3.0588, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.229081180679942e-05, |
|
"loss": 2.9373, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.893568909023427e-05, |
|
"loss": 3.092, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.557976269905237e-05, |
|
"loss": 3.0733, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.223106707028106e-05, |
|
"loss": 2.9187, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.889761932974993e-05, |
|
"loss": 2.9659, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.5587400098257335e-05, |
|
"loss": 3.1799, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.230833438513365e-05, |
|
"loss": 2.9959, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9068272614944106e-05, |
|
"loss": 2.8529, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.58749718327555e-05, |
|
"loss": 3.0337, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.2736077132963006e-05, |
|
"loss": 3.0827, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9659103356138536e-05, |
|
"loss": 2.8453, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.6651417097720435e-05, |
|
"loss": 2.9619, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.372021907161731e-05, |
|
"loss": 3.0061, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.0872526870949537e-05, |
|
"loss": 2.8687, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.8115158167201102e-05, |
|
"loss": 2.9913, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.5454714388004492e-05, |
|
"loss": 2.9778, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.289756491263597e-05, |
|
"loss": 3.0748, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.0449831823058788e-05, |
|
"loss": 2.7658, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8117375247021725e-05, |
|
"loss": 2.9792, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.5905779328303487e-05, |
|
"loss": 2.7718, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3820338857691364e-05, |
|
"loss": 2.9954, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1866046596701035e-05, |
|
"loss": 2.9246, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.0047581324385938e-05, |
|
"loss": 2.9875, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.36929663585326e-06, |
|
"loss": 2.673, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.835210519304257e-06, |
|
"loss": 2.8515, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.448995736552248e-06, |
|
"loss": 2.9416, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.213971030048682e-06, |
|
"loss": 2.8573, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.133093177468323e-06, |
|
"loss": 2.6045, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.208949912875789e-06, |
|
"loss": 3.0414, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4437537314208725e-06, |
|
"loss": 2.7666, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.39336592394954e-07, |
|
"loss": 2.7331, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.971455333297437e-07, |
|
"loss": 2.847, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1823920563887646e-07, |
|
"loss": 2.7128, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.2853400962779e-09, |
|
"loss": 2.6074, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.670438766479492, |
|
"eval_runtime": 18.6137, |
|
"eval_samples_per_second": 22.833, |
|
"eval_steps_per_second": 2.901, |
|
"step": 321 |
|
} |
|
], |
|
"max_steps": 321, |
|
"num_train_epochs": 1, |
|
"total_flos": 334715092992000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|