|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.027496133356246778, |
|
"eval_steps": 5, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001374806667812339, |
|
"grad_norm": 0.42937517166137695, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7403, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001374806667812339, |
|
"eval_loss": 1.7371172904968262, |
|
"eval_runtime": 76.9114, |
|
"eval_samples_per_second": 15.94, |
|
"eval_steps_per_second": 7.97, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002749613335624678, |
|
"grad_norm": 0.45928120613098145, |
|
"learning_rate": 2e-05, |
|
"loss": 1.702, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0041244200034370165, |
|
"grad_norm": 0.3917050063610077, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6859, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005499226671249356, |
|
"grad_norm": 0.4303048849105835, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7193, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.006874033339061694, |
|
"grad_norm": 0.4062155485153198, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6722, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006874033339061694, |
|
"eval_loss": 1.672951102256775, |
|
"eval_runtime": 28.0469, |
|
"eval_samples_per_second": 43.712, |
|
"eval_steps_per_second": 21.856, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008248840006874033, |
|
"grad_norm": 0.4282960295677185, |
|
"learning_rate": 6e-05, |
|
"loss": 1.6042, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009623646674686372, |
|
"grad_norm": 0.4546167552471161, |
|
"learning_rate": 7e-05, |
|
"loss": 1.6189, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.010998453342498712, |
|
"grad_norm": 0.48203930258750916, |
|
"learning_rate": 8e-05, |
|
"loss": 1.6342, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01237326001031105, |
|
"grad_norm": 0.5538018941879272, |
|
"learning_rate": 9e-05, |
|
"loss": 1.6007, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.013748066678123389, |
|
"grad_norm": 0.511499285697937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5262, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013748066678123389, |
|
"eval_loss": 1.472701072692871, |
|
"eval_runtime": 58.4822, |
|
"eval_samples_per_second": 20.964, |
|
"eval_steps_per_second": 10.482, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015122873345935728, |
|
"grad_norm": 0.5674175024032593, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.4697, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.016497680013748066, |
|
"grad_norm": 0.5313400030136108, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.3832, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.017872486681560405, |
|
"grad_norm": 0.5378307700157166, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.3486, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.019247293349372745, |
|
"grad_norm": 0.48813918232917786, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.2926, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.020622100017185084, |
|
"grad_norm": 0.4850353002548218, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2852, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.020622100017185084, |
|
"eval_loss": 1.2362544536590576, |
|
"eval_runtime": 59.0914, |
|
"eval_samples_per_second": 20.748, |
|
"eval_steps_per_second": 10.374, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.021996906684997423, |
|
"grad_norm": 0.46873438358306885, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 1.2155, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02337171335280976, |
|
"grad_norm": 0.42953404784202576, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 1.1754, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0247465200206221, |
|
"grad_norm": 0.41407155990600586, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.2063, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.026121326688434438, |
|
"grad_norm": 0.42715543508529663, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 1.1701, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.027496133356246778, |
|
"grad_norm": 0.4192153811454773, |
|
"learning_rate": 0.0, |
|
"loss": 1.1276, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.027496133356246778, |
|
"eval_loss": 1.1842869520187378, |
|
"eval_runtime": 61.791, |
|
"eval_samples_per_second": 19.841, |
|
"eval_steps_per_second": 9.921, |
|
"step": 20 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.302986508730368e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|