|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0033419977720014855, |
|
"eval_steps": 3, |
|
"global_step": 9, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003713330857779428, |
|
"grad_norm": 0.9089969992637634, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6876, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003713330857779428, |
|
"eval_loss": 0.4146934747695923, |
|
"eval_runtime": 37.185, |
|
"eval_samples_per_second": 30.496, |
|
"eval_steps_per_second": 15.248, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007426661715558856, |
|
"grad_norm": 1.0030620098114014, |
|
"learning_rate": 4e-05, |
|
"loss": 1.5185, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0011139992573338284, |
|
"grad_norm": 1.4879279136657715, |
|
"learning_rate": 6e-05, |
|
"loss": 2.5859, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0011139992573338284, |
|
"eval_loss": 0.41284868121147156, |
|
"eval_runtime": 37.1943, |
|
"eval_samples_per_second": 30.489, |
|
"eval_steps_per_second": 15.244, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0014853323431117712, |
|
"grad_norm": 0.9057229161262512, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4247, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.001856665428889714, |
|
"grad_norm": 0.7766861319541931, |
|
"learning_rate": 0.0001, |
|
"loss": 1.363, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0022279985146676567, |
|
"grad_norm": 1.2344207763671875, |
|
"learning_rate": 0.00012, |
|
"loss": 1.5412, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0022279985146676567, |
|
"eval_loss": 0.3825955390930176, |
|
"eval_runtime": 37.478, |
|
"eval_samples_per_second": 30.258, |
|
"eval_steps_per_second": 15.129, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0025993316004456, |
|
"grad_norm": 1.7376594543457031, |
|
"learning_rate": 0.00014, |
|
"loss": 1.6128, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0029706646862235424, |
|
"grad_norm": 1.4749916791915894, |
|
"learning_rate": 0.00016, |
|
"loss": 1.1024, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0033419977720014855, |
|
"grad_norm": 1.9655250310897827, |
|
"learning_rate": 0.00018, |
|
"loss": 1.2391, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0033419977720014855, |
|
"eval_loss": 0.22511792182922363, |
|
"eval_runtime": 37.5364, |
|
"eval_samples_per_second": 30.211, |
|
"eval_steps_per_second": 15.105, |
|
"step": 9 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1465874317836288.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|