|
{ |
|
"best_metric": 1.8635917901992798, |
|
"best_model_checkpoint": "./outputs/checkpoint-2100", |
|
"epoch": 2.9829545454545454, |
|
"eval_steps": 100, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4609, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.123192071914673, |
|
"eval_runtime": 56.1237, |
|
"eval_samples_per_second": 26.584, |
|
"eval_steps_per_second": 3.332, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2635, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 2.083603858947754, |
|
"eval_runtime": 53.713, |
|
"eval_samples_per_second": 27.777, |
|
"eval_steps_per_second": 3.481, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2344, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.054030656814575, |
|
"eval_runtime": 53.5493, |
|
"eval_samples_per_second": 27.862, |
|
"eval_steps_per_second": 3.492, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2009, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.038727045059204, |
|
"eval_runtime": 53.5913, |
|
"eval_samples_per_second": 27.84, |
|
"eval_steps_per_second": 3.489, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1811, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.0150375366210938, |
|
"eval_runtime": 53.7081, |
|
"eval_samples_per_second": 27.78, |
|
"eval_steps_per_second": 3.482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1648, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.9949842691421509, |
|
"eval_runtime": 53.6059, |
|
"eval_samples_per_second": 27.833, |
|
"eval_steps_per_second": 3.488, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1446, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.9850085973739624, |
|
"eval_runtime": 53.5892, |
|
"eval_samples_per_second": 27.841, |
|
"eval_steps_per_second": 3.49, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1122, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 1.9744948148727417, |
|
"eval_runtime": 53.6175, |
|
"eval_samples_per_second": 27.827, |
|
"eval_steps_per_second": 3.488, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0852, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.9583721160888672, |
|
"eval_runtime": 53.7484, |
|
"eval_samples_per_second": 27.759, |
|
"eval_steps_per_second": 3.479, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0848, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 1.9483006000518799, |
|
"eval_runtime": 53.6139, |
|
"eval_samples_per_second": 27.829, |
|
"eval_steps_per_second": 3.488, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0002, |
|
"loss": 2.091, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 1.936788558959961, |
|
"eval_runtime": 53.6874, |
|
"eval_samples_per_second": 27.791, |
|
"eval_steps_per_second": 3.483, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 1.929431438446045, |
|
"eval_runtime": 53.7712, |
|
"eval_samples_per_second": 27.747, |
|
"eval_steps_per_second": 3.478, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0524, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 1.9179173707962036, |
|
"eval_runtime": 53.6365, |
|
"eval_samples_per_second": 27.817, |
|
"eval_steps_per_second": 3.486, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0562, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 1.9093526601791382, |
|
"eval_runtime": 53.6029, |
|
"eval_samples_per_second": 27.834, |
|
"eval_steps_per_second": 3.489, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0075, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 1.9054597616195679, |
|
"eval_runtime": 53.6616, |
|
"eval_samples_per_second": 27.804, |
|
"eval_steps_per_second": 3.485, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0119, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 1.8929505348205566, |
|
"eval_runtime": 53.7348, |
|
"eval_samples_per_second": 27.766, |
|
"eval_steps_per_second": 3.48, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9964, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 1.8908874988555908, |
|
"eval_runtime": 53.6745, |
|
"eval_samples_per_second": 27.797, |
|
"eval_steps_per_second": 3.484, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9869, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.8806322813034058, |
|
"eval_runtime": 53.6914, |
|
"eval_samples_per_second": 27.788, |
|
"eval_steps_per_second": 3.483, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0004, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.877378225326538, |
|
"eval_runtime": 53.6054, |
|
"eval_samples_per_second": 27.833, |
|
"eval_steps_per_second": 3.488, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.991, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.8717212677001953, |
|
"eval_runtime": 53.7679, |
|
"eval_samples_per_second": 27.749, |
|
"eval_steps_per_second": 3.478, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9898, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 1.8635917901992798, |
|
"eval_runtime": 53.6308, |
|
"eval_samples_per_second": 27.82, |
|
"eval_steps_per_second": 3.487, |
|
"step": 2100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 6.361597443704832e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|