|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.00023108831039781854, |
|
"eval_steps": 3, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.3108831039781854e-05, |
|
"grad_norm": 0.3176736533641815, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3064, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.3108831039781854e-05, |
|
"eval_loss": 2.764178991317749, |
|
"eval_runtime": 184.6756, |
|
"eval_samples_per_second": 98.665, |
|
"eval_steps_per_second": 49.335, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 4.621766207956371e-05, |
|
"grad_norm": 0.24799887835979462, |
|
"learning_rate": 4e-05, |
|
"loss": 2.6087, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 6.932649311934556e-05, |
|
"grad_norm": 0.2642222046852112, |
|
"learning_rate": 6e-05, |
|
"loss": 2.779, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 6.932649311934556e-05, |
|
"eval_loss": 2.764132022857666, |
|
"eval_runtime": 202.2714, |
|
"eval_samples_per_second": 90.082, |
|
"eval_steps_per_second": 45.043, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 9.243532415912742e-05, |
|
"grad_norm": 0.2646380662918091, |
|
"learning_rate": 8e-05, |
|
"loss": 2.3385, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00011554415519890927, |
|
"grad_norm": 0.24436058104038239, |
|
"learning_rate": 0.0001, |
|
"loss": 2.6531, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00013865298623869112, |
|
"grad_norm": 0.24375653266906738, |
|
"learning_rate": 0.00012, |
|
"loss": 2.5198, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00013865298623869112, |
|
"eval_loss": 2.763354539871216, |
|
"eval_runtime": 265.9567, |
|
"eval_samples_per_second": 68.511, |
|
"eval_steps_per_second": 34.257, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00016176181727847296, |
|
"grad_norm": 0.2417832463979721, |
|
"learning_rate": 0.00014, |
|
"loss": 3.0405, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00018487064831825483, |
|
"grad_norm": 0.2619391977787018, |
|
"learning_rate": 0.00016, |
|
"loss": 3.0379, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00020797947935803667, |
|
"grad_norm": 0.2710992395877838, |
|
"learning_rate": 0.00018, |
|
"loss": 2.6747, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00020797947935803667, |
|
"eval_loss": 2.761340856552124, |
|
"eval_runtime": 265.6473, |
|
"eval_samples_per_second": 68.591, |
|
"eval_steps_per_second": 34.297, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00023108831039781854, |
|
"grad_norm": 0.24306856095790863, |
|
"learning_rate": 0.0002, |
|
"loss": 3.0043, |
|
"step": 10 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 52296291975168.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|