|
{ |
|
"best_metric": 1.3063520193099976, |
|
"best_model_checkpoint": "./outputs/checkpoint-1800", |
|
"epoch": 1.3114754098360657, |
|
"eval_steps": 100, |
|
"global_step": 1800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 1.765, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.631608486175537, |
|
"eval_runtime": 430.1354, |
|
"eval_samples_per_second": 14.586, |
|
"eval_steps_per_second": 1.825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.5870035886764526, |
|
"eval_runtime": 417.8578, |
|
"eval_samples_per_second": 15.015, |
|
"eval_steps_per_second": 1.879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.5563029050827026, |
|
"eval_runtime": 417.8787, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.5300686359405518, |
|
"eval_runtime": 417.9846, |
|
"eval_samples_per_second": 15.01, |
|
"eval_steps_per_second": 1.878, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5074832439422607, |
|
"eval_runtime": 417.962, |
|
"eval_samples_per_second": 15.011, |
|
"eval_steps_per_second": 1.878, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4946, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4870833158493042, |
|
"eval_runtime": 417.6568, |
|
"eval_samples_per_second": 15.022, |
|
"eval_steps_per_second": 1.88, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.472, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.4683642387390137, |
|
"eval_runtime": 417.7114, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4622, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.4501450061798096, |
|
"eval_runtime": 417.7046, |
|
"eval_samples_per_second": 15.02, |
|
"eval_steps_per_second": 1.879, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.4323909282684326, |
|
"eval_runtime": 417.8696, |
|
"eval_samples_per_second": 15.014, |
|
"eval_steps_per_second": 1.879, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4268, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.4162870645523071, |
|
"eval_runtime": 417.7667, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4125, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.4018020629882812, |
|
"eval_runtime": 417.5926, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3846, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.3857518434524536, |
|
"eval_runtime": 417.6267, |
|
"eval_samples_per_second": 15.023, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3933, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.3718016147613525, |
|
"eval_runtime": 417.2876, |
|
"eval_samples_per_second": 15.035, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3556, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.3577570915222168, |
|
"eval_runtime": 417.5078, |
|
"eval_samples_per_second": 15.027, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3316, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.3446884155273438, |
|
"eval_runtime": 417.3899, |
|
"eval_samples_per_second": 15.032, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3207, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.3314917087554932, |
|
"eval_runtime": 417.7717, |
|
"eval_samples_per_second": 15.018, |
|
"eval_steps_per_second": 1.879, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3212, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.318053960800171, |
|
"eval_runtime": 417.56, |
|
"eval_samples_per_second": 15.025, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2967, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.3063520193099976, |
|
"eval_runtime": 417.5913, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1800 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 5.83614412193151e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|