|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.02040816326530612, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00040816326530612246, |
|
"eval_loss": 1.038622498512268, |
|
"eval_runtime": 100.2598, |
|
"eval_samples_per_second": 10.293, |
|
"eval_steps_per_second": 5.147, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0020408163265306124, |
|
"grad_norm": 1.570367455482483, |
|
"learning_rate": 5e-05, |
|
"loss": 4.0227, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004081632653061225, |
|
"grad_norm": 1.5186132192611694, |
|
"learning_rate": 0.0001, |
|
"loss": 3.7081, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004081632653061225, |
|
"eval_loss": 0.9728806614875793, |
|
"eval_runtime": 100.715, |
|
"eval_samples_per_second": 10.247, |
|
"eval_steps_per_second": 5.123, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006122448979591836, |
|
"grad_norm": 1.417625069618225, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 3.859, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00816326530612245, |
|
"grad_norm": 2.1316819190979004, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 3.3143, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00816326530612245, |
|
"eval_loss": 0.8414375185966492, |
|
"eval_runtime": 100.4603, |
|
"eval_samples_per_second": 10.273, |
|
"eval_steps_per_second": 5.136, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01020408163265306, |
|
"grad_norm": 1.9280800819396973, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 3.1387, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.012244897959183673, |
|
"grad_norm": 2.512467861175537, |
|
"learning_rate": 5e-05, |
|
"loss": 3.2341, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012244897959183673, |
|
"eval_loss": 0.763218879699707, |
|
"eval_runtime": 100.7949, |
|
"eval_samples_per_second": 10.239, |
|
"eval_steps_per_second": 5.119, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014285714285714285, |
|
"grad_norm": 2.0444631576538086, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 3.1349, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0163265306122449, |
|
"grad_norm": 2.757094621658325, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 2.8461, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0163265306122449, |
|
"eval_loss": 0.7422359585762024, |
|
"eval_runtime": 100.6434, |
|
"eval_samples_per_second": 10.254, |
|
"eval_steps_per_second": 5.127, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.018367346938775512, |
|
"grad_norm": 2.56603741645813, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 2.8422, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02040816326530612, |
|
"grad_norm": 2.2543857097625732, |
|
"learning_rate": 0.0, |
|
"loss": 2.915, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02040816326530612, |
|
"eval_loss": 0.7383472323417664, |
|
"eval_runtime": 100.892, |
|
"eval_samples_per_second": 10.229, |
|
"eval_steps_per_second": 5.114, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.74734328201216e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|