|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.07267441860465117, |
|
"eval_steps": 9, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0029069767441860465, |
|
"grad_norm": 0.6269262433052063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4149, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0029069767441860465, |
|
"eval_loss": 0.32177430391311646, |
|
"eval_runtime": 32.1501, |
|
"eval_samples_per_second": 9.02, |
|
"eval_steps_per_second": 1.151, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005813953488372093, |
|
"grad_norm": 0.40142378211021423, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2732, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00872093023255814, |
|
"grad_norm": 0.3893200755119324, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2537, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011627906976744186, |
|
"grad_norm": 0.44946348667144775, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2946, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014534883720930232, |
|
"grad_norm": 0.46690016984939575, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2881, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01744186046511628, |
|
"grad_norm": 0.4786141812801361, |
|
"learning_rate": 6e-05, |
|
"loss": 0.2854, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.020348837209302327, |
|
"grad_norm": 0.637237548828125, |
|
"learning_rate": 7e-05, |
|
"loss": 0.3118, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.023255813953488372, |
|
"grad_norm": 0.35781624913215637, |
|
"learning_rate": 8e-05, |
|
"loss": 0.2149, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02616279069767442, |
|
"grad_norm": 0.4675982594490051, |
|
"learning_rate": 9e-05, |
|
"loss": 0.1809, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02616279069767442, |
|
"eval_loss": 0.18669699132442474, |
|
"eval_runtime": 31.781, |
|
"eval_samples_per_second": 9.125, |
|
"eval_steps_per_second": 1.164, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.029069767441860465, |
|
"grad_norm": 1.1234859228134155, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1509, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03197674418604651, |
|
"grad_norm": 0.597476601600647, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 0.1963, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03488372093023256, |
|
"grad_norm": 0.45688414573669434, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.0975, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0377906976744186, |
|
"grad_norm": 0.43219056725502014, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 0.0719, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.040697674418604654, |
|
"grad_norm": 0.5428093075752258, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 0.1036, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0436046511627907, |
|
"grad_norm": 0.7716228365898132, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 0.081, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.046511627906976744, |
|
"grad_norm": 0.8211978673934937, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.1221, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04941860465116279, |
|
"grad_norm": 0.9486254453659058, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 0.1489, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05232558139534884, |
|
"grad_norm": 0.5020085573196411, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.1001, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05232558139534884, |
|
"eval_loss": 0.06299833208322525, |
|
"eval_runtime": 31.8162, |
|
"eval_samples_per_second": 9.115, |
|
"eval_steps_per_second": 1.163, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.055232558139534885, |
|
"grad_norm": 0.3962463140487671, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.0522, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05813953488372093, |
|
"grad_norm": 0.774312436580658, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.1395, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.061046511627906974, |
|
"grad_norm": 0.4788380265235901, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 0.0497, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06395348837209303, |
|
"grad_norm": 0.4114557206630707, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.0671, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06686046511627906, |
|
"grad_norm": 0.18382249772548676, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 0.0168, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06976744186046512, |
|
"grad_norm": 0.30654194951057434, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.0493, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07267441860465117, |
|
"grad_norm": 0.5613061189651489, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.0914, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8918125730791424e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|