|
{ |
|
"best_metric": 2.360255479812622, |
|
"best_model_checkpoint": "./outputs/checkpoint-1100", |
|
"epoch": 0.8014571948998178, |
|
"eval_steps": 100, |
|
"global_step": 1100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.7399, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.6418099403381348, |
|
"eval_runtime": 347.6157, |
|
"eval_samples_per_second": 18.049, |
|
"eval_steps_per_second": 2.258, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6052, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.5918312072753906, |
|
"eval_runtime": 333.731, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 2.352, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5622, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.551574468612671, |
|
"eval_runtime": 204.9306, |
|
"eval_samples_per_second": 30.615, |
|
"eval_steps_per_second": 3.831, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5366, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.517575263977051, |
|
"eval_runtime": 204.3925, |
|
"eval_samples_per_second": 30.696, |
|
"eval_steps_per_second": 3.841, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4946, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.4924821853637695, |
|
"eval_runtime": 204.4035, |
|
"eval_samples_per_second": 30.694, |
|
"eval_steps_per_second": 3.84, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4686, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.4666266441345215, |
|
"eval_runtime": 207.3453, |
|
"eval_samples_per_second": 30.259, |
|
"eval_steps_per_second": 3.786, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4503, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.4440107345581055, |
|
"eval_runtime": 205.5485, |
|
"eval_samples_per_second": 30.523, |
|
"eval_steps_per_second": 3.819, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4271, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.4231719970703125, |
|
"eval_runtime": 204.3763, |
|
"eval_samples_per_second": 30.698, |
|
"eval_steps_per_second": 3.841, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3911, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.401764154434204, |
|
"eval_runtime": 204.7042, |
|
"eval_samples_per_second": 30.649, |
|
"eval_steps_per_second": 3.835, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3881, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.3822004795074463, |
|
"eval_runtime": 204.6048, |
|
"eval_samples_per_second": 30.664, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3811, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.360255479812622, |
|
"eval_runtime": 283.1616, |
|
"eval_samples_per_second": 22.157, |
|
"eval_steps_per_second": 2.772, |
|
"step": 1100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 3.208363367473152e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|