|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0017287578874578616, |
|
"eval_steps": 3, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00017287578874578615, |
|
"grad_norm": 3.553006649017334, |
|
"learning_rate": 2e-05, |
|
"loss": 10.147, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00017287578874578615, |
|
"eval_loss": 10.197986602783203, |
|
"eval_runtime": 100.6946, |
|
"eval_samples_per_second": 24.192, |
|
"eval_steps_per_second": 12.096, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003457515774915723, |
|
"grad_norm": 3.655125141143799, |
|
"learning_rate": 4e-05, |
|
"loss": 9.9026, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005186273662373585, |
|
"grad_norm": 3.4464893341064453, |
|
"learning_rate": 6e-05, |
|
"loss": 9.9519, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005186273662373585, |
|
"eval_loss": 10.192020416259766, |
|
"eval_runtime": 100.4666, |
|
"eval_samples_per_second": 24.247, |
|
"eval_steps_per_second": 12.123, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006915031549831446, |
|
"grad_norm": 3.5533785820007324, |
|
"learning_rate": 8e-05, |
|
"loss": 10.1142, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0008643789437289308, |
|
"grad_norm": 3.568767786026001, |
|
"learning_rate": 0.0001, |
|
"loss": 10.1969, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001037254732474717, |
|
"grad_norm": 3.5537147521972656, |
|
"learning_rate": 0.00012, |
|
"loss": 10.2579, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001037254732474717, |
|
"eval_loss": 10.108320236206055, |
|
"eval_runtime": 105.2129, |
|
"eval_samples_per_second": 23.153, |
|
"eval_steps_per_second": 11.577, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001210130521220503, |
|
"grad_norm": 3.7763233184814453, |
|
"learning_rate": 0.00014, |
|
"loss": 10.4266, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0013830063099662892, |
|
"grad_norm": 3.3314616680145264, |
|
"learning_rate": 0.00016, |
|
"loss": 10.0965, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0015558820987120754, |
|
"grad_norm": 3.2022037506103516, |
|
"learning_rate": 0.00018, |
|
"loss": 10.0676, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0015558820987120754, |
|
"eval_loss": 9.950599670410156, |
|
"eval_runtime": 106.7503, |
|
"eval_samples_per_second": 22.82, |
|
"eval_steps_per_second": 11.41, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0017287578874578616, |
|
"grad_norm": 3.962679147720337, |
|
"learning_rate": 0.0002, |
|
"loss": 9.874, |
|
"step": 10 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5429904015360.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|