|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06105006105006105, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001221001221001221, |
|
"eval_loss": 11.934243202209473, |
|
"eval_runtime": 5.9922, |
|
"eval_samples_per_second": 57.575, |
|
"eval_steps_per_second": 28.871, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006105006105006105, |
|
"grad_norm": 0.010467967949807644, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9338, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01221001221001221, |
|
"grad_norm": 0.012429132126271725, |
|
"learning_rate": 0.0001, |
|
"loss": 11.934, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01221001221001221, |
|
"eval_loss": 11.934219360351562, |
|
"eval_runtime": 6.086, |
|
"eval_samples_per_second": 56.688, |
|
"eval_steps_per_second": 28.426, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018315018315018316, |
|
"grad_norm": 0.010163530707359314, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 11.9346, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02442002442002442, |
|
"grad_norm": 0.010413922369480133, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 11.9359, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02442002442002442, |
|
"eval_loss": 11.934165954589844, |
|
"eval_runtime": 5.9793, |
|
"eval_samples_per_second": 57.699, |
|
"eval_steps_per_second": 28.933, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.030525030525030524, |
|
"grad_norm": 0.010765035636723042, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 11.933, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 0.017171213403344154, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9326, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"eval_loss": 11.934120178222656, |
|
"eval_runtime": 6.1064, |
|
"eval_samples_per_second": 56.498, |
|
"eval_steps_per_second": 28.331, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.042735042735042736, |
|
"grad_norm": 0.009910144843161106, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 11.9316, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04884004884004884, |
|
"grad_norm": 0.014119105413556099, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 11.9326, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04884004884004884, |
|
"eval_loss": 11.934100151062012, |
|
"eval_runtime": 6.0973, |
|
"eval_samples_per_second": 56.582, |
|
"eval_steps_per_second": 28.373, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.054945054945054944, |
|
"grad_norm": 0.00930514745414257, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 11.9322, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06105006105006105, |
|
"grad_norm": 0.010311778634786606, |
|
"learning_rate": 0.0, |
|
"loss": 11.9343, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06105006105006105, |
|
"eval_loss": 11.934097290039062, |
|
"eval_runtime": 6.0342, |
|
"eval_samples_per_second": 57.174, |
|
"eval_steps_per_second": 28.67, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5908070400.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|