|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 5940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.2936731278896332, |
|
"eval_runtime": 114.2823, |
|
"eval_samples_per_second": 41.502, |
|
"eval_steps_per_second": 0.656, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.57912457912458e-05, |
|
"loss": 0.5604, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.21221154928207397, |
|
"eval_runtime": 121.2766, |
|
"eval_samples_per_second": 39.109, |
|
"eval_steps_per_second": 0.618, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.05269444361329079, |
|
"eval_runtime": 119.2093, |
|
"eval_samples_per_second": 39.787, |
|
"eval_steps_per_second": 0.629, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.158249158249159e-05, |
|
"loss": 0.1318, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.014130596071481705, |
|
"eval_runtime": 183.4661, |
|
"eval_samples_per_second": 25.852, |
|
"eval_steps_per_second": 0.409, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.00778605230152607, |
|
"eval_runtime": 130.1216, |
|
"eval_samples_per_second": 36.451, |
|
"eval_steps_per_second": 0.576, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.7373737373737376e-05, |
|
"loss": 0.0552, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.004267835058271885, |
|
"eval_runtime": 130.782, |
|
"eval_samples_per_second": 36.266, |
|
"eval_steps_per_second": 0.573, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3.3164983164983165e-05, |
|
"loss": 0.0197, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.002914531622081995, |
|
"eval_runtime": 126.9043, |
|
"eval_samples_per_second": 37.375, |
|
"eval_steps_per_second": 0.591, |
|
"step": 2079 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.0024584291968494654, |
|
"eval_runtime": 129.977, |
|
"eval_samples_per_second": 36.491, |
|
"eval_steps_per_second": 0.577, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 2.8956228956228958e-05, |
|
"loss": 0.0104, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.0008079797262325883, |
|
"eval_runtime": 124.2007, |
|
"eval_samples_per_second": 38.188, |
|
"eval_steps_per_second": 0.604, |
|
"step": 2673 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.0014460551319643855, |
|
"eval_runtime": 124.7091, |
|
"eval_samples_per_second": 38.033, |
|
"eval_steps_per_second": 0.601, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 2.474747474747475e-05, |
|
"loss": 0.0062, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.0016354549443349242, |
|
"eval_runtime": 126.5389, |
|
"eval_samples_per_second": 37.483, |
|
"eval_steps_per_second": 0.593, |
|
"step": 3267 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 2.0538720538720542e-05, |
|
"loss": 0.0045, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.000995765090920031, |
|
"eval_runtime": 131.3803, |
|
"eval_samples_per_second": 36.101, |
|
"eval_steps_per_second": 0.571, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.00040818448178470135, |
|
"eval_runtime": 136.6658, |
|
"eval_samples_per_second": 34.705, |
|
"eval_steps_per_second": 0.549, |
|
"step": 3861 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 1.632996632996633e-05, |
|
"loss": 0.0031, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.0001601761905476451, |
|
"eval_runtime": 128.2083, |
|
"eval_samples_per_second": 36.994, |
|
"eval_steps_per_second": 0.585, |
|
"step": 4158 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.0001224653678946197, |
|
"eval_runtime": 104.2251, |
|
"eval_samples_per_second": 45.507, |
|
"eval_steps_per_second": 0.72, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.0017, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.00017290345567744225, |
|
"eval_runtime": 131.2825, |
|
"eval_samples_per_second": 36.128, |
|
"eval_steps_per_second": 0.571, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 7.912457912457913e-06, |
|
"loss": 0.0012, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 4.626844383892603e-05, |
|
"eval_runtime": 98.0039, |
|
"eval_samples_per_second": 48.396, |
|
"eval_steps_per_second": 0.765, |
|
"step": 5049 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 7.748394273221493e-05, |
|
"eval_runtime": 114.2754, |
|
"eval_samples_per_second": 41.505, |
|
"eval_steps_per_second": 0.656, |
|
"step": 5346 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0009, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 3.669047146104276e-05, |
|
"eval_runtime": 103.8038, |
|
"eval_samples_per_second": 45.692, |
|
"eval_steps_per_second": 0.723, |
|
"step": 5643 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 3.5578090319177136e-05, |
|
"eval_runtime": 99.0675, |
|
"eval_samples_per_second": 47.876, |
|
"eval_steps_per_second": 0.757, |
|
"step": 5940 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5940, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 785921954204160.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|