|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 27.149321266968325, |
|
"eval_steps": 1000, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.9201030731201172, |
|
"eval_runtime": 10.5666, |
|
"eval_samples_per_second": 20.915, |
|
"eval_steps_per_second": 5.3, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.8042283058166504, |
|
"eval_runtime": 10.5219, |
|
"eval_samples_per_second": 21.004, |
|
"eval_steps_per_second": 5.322, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.622926093514329e-05, |
|
"loss": 2.1534, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.7254605293273926, |
|
"eval_runtime": 10.5492, |
|
"eval_samples_per_second": 20.95, |
|
"eval_steps_per_second": 5.308, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.653130054473877, |
|
"eval_runtime": 10.5858, |
|
"eval_samples_per_second": 20.877, |
|
"eval_steps_per_second": 5.29, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.2458521870286574e-05, |
|
"loss": 1.8808, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.5984195470809937, |
|
"eval_runtime": 10.5172, |
|
"eval_samples_per_second": 21.013, |
|
"eval_steps_per_second": 5.325, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.5462396144866943, |
|
"eval_runtime": 10.5813, |
|
"eval_samples_per_second": 20.886, |
|
"eval_steps_per_second": 5.292, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 3.868778280542987e-05, |
|
"loss": 1.7597, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.4970717430114746, |
|
"eval_runtime": 10.5617, |
|
"eval_samples_per_second": 20.925, |
|
"eval_steps_per_second": 5.302, |
|
"step": 1547 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.4608112573623657, |
|
"eval_runtime": 10.5482, |
|
"eval_samples_per_second": 20.952, |
|
"eval_steps_per_second": 5.309, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.426717758178711, |
|
"eval_runtime": 10.5377, |
|
"eval_samples_per_second": 20.972, |
|
"eval_steps_per_second": 5.314, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 3.491704374057315e-05, |
|
"loss": 1.673, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.3964918851852417, |
|
"eval_runtime": 10.5931, |
|
"eval_samples_per_second": 20.863, |
|
"eval_steps_per_second": 5.286, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.3659923076629639, |
|
"eval_runtime": 10.5424, |
|
"eval_samples_per_second": 20.963, |
|
"eval_steps_per_second": 5.312, |
|
"step": 2431 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 3.114630467571644e-05, |
|
"loss": 1.6087, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.3398691415786743, |
|
"eval_runtime": 10.5482, |
|
"eval_samples_per_second": 20.951, |
|
"eval_steps_per_second": 5.309, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.3123878240585327, |
|
"eval_runtime": 10.4901, |
|
"eval_samples_per_second": 21.067, |
|
"eval_steps_per_second": 5.338, |
|
"step": 2873 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 2.737556561085973e-05, |
|
"loss": 1.5523, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.2912379503250122, |
|
"eval_runtime": 10.5302, |
|
"eval_samples_per_second": 20.987, |
|
"eval_steps_per_second": 5.318, |
|
"step": 3094 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.2690876722335815, |
|
"eval_runtime": 10.5127, |
|
"eval_samples_per_second": 21.022, |
|
"eval_steps_per_second": 5.327, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 2.3604826546003017e-05, |
|
"loss": 1.5072, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.2487969398498535, |
|
"eval_runtime": 10.5677, |
|
"eval_samples_per_second": 20.913, |
|
"eval_steps_per_second": 5.299, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.2326879501342773, |
|
"eval_runtime": 10.5424, |
|
"eval_samples_per_second": 20.963, |
|
"eval_steps_per_second": 5.312, |
|
"step": 3757 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.2160953283309937, |
|
"eval_runtime": 10.5427, |
|
"eval_samples_per_second": 20.962, |
|
"eval_steps_per_second": 5.312, |
|
"step": 3978 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 1.9834087481146303e-05, |
|
"loss": 1.4711, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.2029471397399902, |
|
"eval_runtime": 10.5308, |
|
"eval_samples_per_second": 20.986, |
|
"eval_steps_per_second": 5.318, |
|
"step": 4199 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.1921287775039673, |
|
"eval_runtime": 10.5502, |
|
"eval_samples_per_second": 20.948, |
|
"eval_steps_per_second": 5.308, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"learning_rate": 1.6063348416289596e-05, |
|
"loss": 1.4329, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.1807990074157715, |
|
"eval_runtime": 10.5791, |
|
"eval_samples_per_second": 20.89, |
|
"eval_steps_per_second": 5.293, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.170788288116455, |
|
"eval_runtime": 10.5201, |
|
"eval_samples_per_second": 21.007, |
|
"eval_steps_per_second": 5.323, |
|
"step": 4862 |
|
}, |
|
{ |
|
"epoch": 22.62, |
|
"learning_rate": 1.229260935143288e-05, |
|
"loss": 1.4091, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.1616638898849487, |
|
"eval_runtime": 10.541, |
|
"eval_samples_per_second": 20.966, |
|
"eval_steps_per_second": 5.313, |
|
"step": 5083 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.1520771980285645, |
|
"eval_runtime": 10.5678, |
|
"eval_samples_per_second": 20.913, |
|
"eval_steps_per_second": 5.299, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 8.52187028657617e-06, |
|
"loss": 1.392, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.1455986499786377, |
|
"eval_runtime": 10.5773, |
|
"eval_samples_per_second": 20.894, |
|
"eval_steps_per_second": 5.294, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.1402668952941895, |
|
"eval_runtime": 10.5575, |
|
"eval_samples_per_second": 20.933, |
|
"eval_steps_per_second": 5.304, |
|
"step": 5746 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.1367387771606445, |
|
"eval_runtime": 10.5633, |
|
"eval_samples_per_second": 20.921, |
|
"eval_steps_per_second": 5.301, |
|
"step": 5967 |
|
}, |
|
{ |
|
"epoch": 27.15, |
|
"learning_rate": 4.751131221719457e-06, |
|
"loss": 1.3771, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6630, |
|
"num_train_epochs": 30, |
|
"save_steps": 1000, |
|
"total_flos": 9484103172096000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|