|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.008376963350785341, |
|
"eval_steps": 13, |
|
"global_step": 13, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006443817962142569, |
|
"grad_norm": 4.898526668548584, |
|
"learning_rate": 1e-05, |
|
"loss": 8.2457, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006443817962142569, |
|
"eval_loss": 7.802831649780273, |
|
"eval_runtime": 47.5305, |
|
"eval_samples_per_second": 54.996, |
|
"eval_steps_per_second": 27.498, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0012887635924285139, |
|
"grad_norm": 3.692995548248291, |
|
"learning_rate": 2e-05, |
|
"loss": 8.3769, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0019331453886427708, |
|
"grad_norm": 4.796352386474609, |
|
"learning_rate": 3e-05, |
|
"loss": 8.6187, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0025775271848570277, |
|
"grad_norm": 3.221654176712036, |
|
"learning_rate": 4e-05, |
|
"loss": 8.0322, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003221908981071285, |
|
"grad_norm": 2.433626174926758, |
|
"learning_rate": 5e-05, |
|
"loss": 7.9877, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0038662907772855416, |
|
"grad_norm": 2.0175940990448, |
|
"learning_rate": 6e-05, |
|
"loss": 7.9417, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004510672573499798, |
|
"grad_norm": 2.2974374294281006, |
|
"learning_rate": 7e-05, |
|
"loss": 8.3393, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0051550543697140554, |
|
"grad_norm": 1.9051103591918945, |
|
"learning_rate": 8e-05, |
|
"loss": 7.4193, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005799436165928313, |
|
"grad_norm": 2.320819616317749, |
|
"learning_rate": 9e-05, |
|
"loss": 8.0907, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00644381796214257, |
|
"grad_norm": 1.9515935182571411, |
|
"learning_rate": 0.0001, |
|
"loss": 7.6772, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007088199758356826, |
|
"grad_norm": 1.723854899406433, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 7.4406, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.007732581554571083, |
|
"grad_norm": 3.2257795333862305, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 7.8407, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.008376963350785341, |
|
"grad_norm": 2.5419061183929443, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 8.0091, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.008376963350785341, |
|
"eval_loss": 7.61268949508667, |
|
"eval_runtime": 23.5679, |
|
"eval_samples_per_second": 110.914, |
|
"eval_steps_per_second": 55.457, |
|
"step": 13 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 28235500879872.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|