|
{ |
|
"best_metric": 1.929431438446045, |
|
"best_model_checkpoint": "./outputs/checkpoint-1200", |
|
"epoch": 1.7045454545454546, |
|
"eval_steps": 100, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4609, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.123192071914673, |
|
"eval_runtime": 56.1237, |
|
"eval_samples_per_second": 26.584, |
|
"eval_steps_per_second": 3.332, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2635, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 2.083603858947754, |
|
"eval_runtime": 53.713, |
|
"eval_samples_per_second": 27.777, |
|
"eval_steps_per_second": 3.481, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2344, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.054030656814575, |
|
"eval_runtime": 53.5493, |
|
"eval_samples_per_second": 27.862, |
|
"eval_steps_per_second": 3.492, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2009, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.038727045059204, |
|
"eval_runtime": 53.5913, |
|
"eval_samples_per_second": 27.84, |
|
"eval_steps_per_second": 3.489, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1811, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.0150375366210938, |
|
"eval_runtime": 53.7081, |
|
"eval_samples_per_second": 27.78, |
|
"eval_steps_per_second": 3.482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1648, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.9949842691421509, |
|
"eval_runtime": 53.6059, |
|
"eval_samples_per_second": 27.833, |
|
"eval_steps_per_second": 3.488, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1446, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.9850085973739624, |
|
"eval_runtime": 53.5892, |
|
"eval_samples_per_second": 27.841, |
|
"eval_steps_per_second": 3.49, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1122, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 1.9744948148727417, |
|
"eval_runtime": 53.6175, |
|
"eval_samples_per_second": 27.827, |
|
"eval_steps_per_second": 3.488, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0852, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.9583721160888672, |
|
"eval_runtime": 53.7484, |
|
"eval_samples_per_second": 27.759, |
|
"eval_steps_per_second": 3.479, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0848, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 1.9483006000518799, |
|
"eval_runtime": 53.6139, |
|
"eval_samples_per_second": 27.829, |
|
"eval_steps_per_second": 3.488, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0002, |
|
"loss": 2.091, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 1.936788558959961, |
|
"eval_runtime": 53.6874, |
|
"eval_samples_per_second": 27.791, |
|
"eval_steps_per_second": 3.483, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 1.929431438446045, |
|
"eval_runtime": 53.7712, |
|
"eval_samples_per_second": 27.747, |
|
"eval_steps_per_second": 3.478, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 3.634363552684032e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|