|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0023502867349816676, |
|
"eval_steps": 9, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.401146939926671e-05, |
|
"grad_norm": 1.1071391105651855, |
|
"learning_rate": 1e-05, |
|
"loss": 3.1216, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 9.401146939926671e-05, |
|
"eval_loss": 2.95656681060791, |
|
"eval_runtime": 253.1318, |
|
"eval_samples_per_second": 35.389, |
|
"eval_steps_per_second": 4.425, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00018802293879853342, |
|
"grad_norm": 0.9681172370910645, |
|
"learning_rate": 2e-05, |
|
"loss": 2.9395, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002820344081978001, |
|
"grad_norm": 0.9034966230392456, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6474, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00037604587759706683, |
|
"grad_norm": 1.0967257022857666, |
|
"learning_rate": 4e-05, |
|
"loss": 2.9769, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00047005734699633354, |
|
"grad_norm": 0.9553994536399841, |
|
"learning_rate": 5e-05, |
|
"loss": 2.889, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0005640688163956002, |
|
"grad_norm": 1.050720453262329, |
|
"learning_rate": 6e-05, |
|
"loss": 2.8748, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.000658080285794867, |
|
"grad_norm": 1.073960781097412, |
|
"learning_rate": 7e-05, |
|
"loss": 2.8886, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0007520917551941337, |
|
"grad_norm": 1.1246281862258911, |
|
"learning_rate": 8e-05, |
|
"loss": 3.0083, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0008461032245934004, |
|
"grad_norm": 1.4751750230789185, |
|
"learning_rate": 9e-05, |
|
"loss": 3.011, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008461032245934004, |
|
"eval_loss": 2.774919033050537, |
|
"eval_runtime": 252.8918, |
|
"eval_samples_per_second": 35.422, |
|
"eval_steps_per_second": 4.429, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0009401146939926671, |
|
"grad_norm": 1.148061752319336, |
|
"learning_rate": 0.0001, |
|
"loss": 2.506, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001034126163391934, |
|
"grad_norm": 1.3042597770690918, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 2.517, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0011281376327912005, |
|
"grad_norm": 1.1282496452331543, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 2.5106, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0012221491021904673, |
|
"grad_norm": 1.2557356357574463, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 2.3613, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.001316160571589734, |
|
"grad_norm": 1.1485098600387573, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 2.3087, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0014101720409890007, |
|
"grad_norm": 1.3429392576217651, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 2.293, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0015041835103882673, |
|
"grad_norm": 1.2680690288543701, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 2.0638, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0015981949797875341, |
|
"grad_norm": 1.4249025583267212, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 2.19, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0016922064491868007, |
|
"grad_norm": 1.2500929832458496, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 2.0502, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0016922064491868007, |
|
"eval_loss": 1.9111806154251099, |
|
"eval_runtime": 252.9419, |
|
"eval_samples_per_second": 35.415, |
|
"eval_steps_per_second": 4.428, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0017862179185860676, |
|
"grad_norm": 1.1099755764007568, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.8685, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0018802293879853342, |
|
"grad_norm": 1.2947331666946411, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 1.8453, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0019742408573846008, |
|
"grad_norm": 1.0603139400482178, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 1.7496, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002068252326783868, |
|
"grad_norm": 1.1734318733215332, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 1.6085, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0021622637961831344, |
|
"grad_norm": 1.0922938585281372, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 1.5754, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.002256275265582401, |
|
"grad_norm": 1.184679627418518, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 1.6059, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0023502867349816676, |
|
"grad_norm": 1.039542555809021, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.3529, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2573421930086400.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|