|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.013343118286743613, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002668623657348722, |
|
"eval_loss": 1.4171478748321533, |
|
"eval_runtime": 51.4601, |
|
"eval_samples_per_second": 30.665, |
|
"eval_steps_per_second": 15.332, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013343118286743613, |
|
"grad_norm": 0.20870009064674377, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1958, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0026686236573487225, |
|
"grad_norm": 0.3936935365200043, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4671, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0026686236573487225, |
|
"eval_loss": 1.3937021493911743, |
|
"eval_runtime": 51.4019, |
|
"eval_samples_per_second": 30.699, |
|
"eval_steps_per_second": 15.35, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004002935486023084, |
|
"grad_norm": 0.2306661754846573, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.3999, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.005337247314697445, |
|
"grad_norm": 0.28997236490249634, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.3391, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005337247314697445, |
|
"eval_loss": 1.331410527229309, |
|
"eval_runtime": 51.2329, |
|
"eval_samples_per_second": 30.801, |
|
"eval_steps_per_second": 15.4, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006671559143371806, |
|
"grad_norm": 0.2963295876979828, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.3088, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008005870972046168, |
|
"grad_norm": 0.3198167383670807, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3884, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008005870972046168, |
|
"eval_loss": 1.3172019720077515, |
|
"eval_runtime": 51.3541, |
|
"eval_samples_per_second": 30.728, |
|
"eval_steps_per_second": 15.364, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009340182800720529, |
|
"grad_norm": 0.271278440952301, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.2783, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01067449462939489, |
|
"grad_norm": 0.31695592403411865, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.2705, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01067449462939489, |
|
"eval_loss": 1.3098112344741821, |
|
"eval_runtime": 52.1649, |
|
"eval_samples_per_second": 30.25, |
|
"eval_steps_per_second": 15.125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012008806458069251, |
|
"grad_norm": 0.33602485060691833, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 1.2586, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.013343118286743613, |
|
"grad_norm": 0.3920798599720001, |
|
"learning_rate": 0.0, |
|
"loss": 1.3313, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013343118286743613, |
|
"eval_loss": 1.3084073066711426, |
|
"eval_runtime": 52.5608, |
|
"eval_samples_per_second": 30.022, |
|
"eval_steps_per_second": 15.011, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1752175051014144.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|