|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.026928471248246846, |
|
"eval_steps": 6, |
|
"global_step": 24, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011220196353436186, |
|
"grad_norm": 0.6813791394233704, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1705, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011220196353436186, |
|
"eval_loss": 2.0811240673065186, |
|
"eval_runtime": 12.7097, |
|
"eval_samples_per_second": 29.584, |
|
"eval_steps_per_second": 14.792, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002244039270687237, |
|
"grad_norm": 0.4890865683555603, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9746, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0033660589060308557, |
|
"grad_norm": 0.8481997847557068, |
|
"learning_rate": 6e-05, |
|
"loss": 2.2087, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004488078541374474, |
|
"grad_norm": 0.40311843156814575, |
|
"learning_rate": 8e-05, |
|
"loss": 2.0627, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005610098176718092, |
|
"grad_norm": 0.5440235733985901, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1454, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006732117812061711, |
|
"grad_norm": 0.3888591229915619, |
|
"learning_rate": 0.00012, |
|
"loss": 2.366, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006732117812061711, |
|
"eval_loss": 2.0609025955200195, |
|
"eval_runtime": 12.7241, |
|
"eval_samples_per_second": 29.55, |
|
"eval_steps_per_second": 14.775, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00785413744740533, |
|
"grad_norm": 0.6480532288551331, |
|
"learning_rate": 0.00014, |
|
"loss": 2.5427, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008976157082748949, |
|
"grad_norm": 0.6572309136390686, |
|
"learning_rate": 0.00016, |
|
"loss": 2.3367, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.010098176718092567, |
|
"grad_norm": 0.4551832973957062, |
|
"learning_rate": 0.00018, |
|
"loss": 1.9394, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011220196353436185, |
|
"grad_norm": 0.6103149652481079, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0257, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012342215988779803, |
|
"grad_norm": 0.7382659912109375, |
|
"learning_rate": 0.00019749279121818235, |
|
"loss": 1.862, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.013464235624123423, |
|
"grad_norm": 0.4550475478172302, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 2.2914, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.013464235624123423, |
|
"eval_loss": 1.8999028205871582, |
|
"eval_runtime": 13.0197, |
|
"eval_samples_per_second": 28.879, |
|
"eval_steps_per_second": 14.44, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.014586255259467041, |
|
"grad_norm": 0.44409334659576416, |
|
"learning_rate": 0.000178183148246803, |
|
"loss": 1.6056, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01570827489481066, |
|
"grad_norm": 0.7820721864700317, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 1.696, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.016830294530154277, |
|
"grad_norm": 0.9811131358146667, |
|
"learning_rate": 0.00014338837391175582, |
|
"loss": 1.5621, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.017952314165497897, |
|
"grad_norm": 0.8004341721534729, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 1.8898, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.019074333800841514, |
|
"grad_norm": 0.6704489588737488, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8404, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.020196353436185133, |
|
"grad_norm": 0.6039404273033142, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 1.8087, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.020196353436185133, |
|
"eval_loss": 1.8163129091262817, |
|
"eval_runtime": 12.9895, |
|
"eval_samples_per_second": 28.946, |
|
"eval_steps_per_second": 14.473, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.021318373071528753, |
|
"grad_norm": 0.5095430016517639, |
|
"learning_rate": 5.6611626088244194e-05, |
|
"loss": 1.6128, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02244039270687237, |
|
"grad_norm": 0.6485509872436523, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 1.885, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02356241234221599, |
|
"grad_norm": 0.6329189538955688, |
|
"learning_rate": 2.181685175319702e-05, |
|
"loss": 1.9701, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.024684431977559606, |
|
"grad_norm": 0.5708733201026917, |
|
"learning_rate": 9.903113209758096e-06, |
|
"loss": 2.4112, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.025806451612903226, |
|
"grad_norm": 0.7266702055931091, |
|
"learning_rate": 2.5072087818176382e-06, |
|
"loss": 1.5506, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.026928471248246846, |
|
"grad_norm": 0.7287759780883789, |
|
"learning_rate": 0.0, |
|
"loss": 2.0879, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.026928471248246846, |
|
"eval_loss": 1.8048760890960693, |
|
"eval_runtime": 12.9656, |
|
"eval_samples_per_second": 29.0, |
|
"eval_steps_per_second": 14.5, |
|
"step": 24 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 24, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 6, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1679736258428928.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|