|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0410958904109589, |
|
"eval_steps": 6, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016438356164383563, |
|
"grad_norm": 18.2674560546875, |
|
"learning_rate": 1e-05, |
|
"loss": 7.2877, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016438356164383563, |
|
"eval_loss": 7.202574729919434, |
|
"eval_runtime": 400.2611, |
|
"eval_samples_per_second": 1.921, |
|
"eval_steps_per_second": 0.482, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0032876712328767125, |
|
"grad_norm": 17.67232894897461, |
|
"learning_rate": 2e-05, |
|
"loss": 7.1103, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004931506849315068, |
|
"grad_norm": 18.076366424560547, |
|
"learning_rate": 3e-05, |
|
"loss": 6.7461, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006575342465753425, |
|
"grad_norm": 16.23063850402832, |
|
"learning_rate": 4e-05, |
|
"loss": 6.8808, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00821917808219178, |
|
"grad_norm": 15.608308792114258, |
|
"learning_rate": 5e-05, |
|
"loss": 6.2408, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009863013698630137, |
|
"grad_norm": 15.950782775878906, |
|
"learning_rate": 6e-05, |
|
"loss": 5.2182, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009863013698630137, |
|
"eval_loss": 3.6735994815826416, |
|
"eval_runtime": 401.6303, |
|
"eval_samples_per_second": 1.915, |
|
"eval_steps_per_second": 0.481, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011506849315068493, |
|
"grad_norm": 17.16571044921875, |
|
"learning_rate": 7e-05, |
|
"loss": 3.5523, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01315068493150685, |
|
"grad_norm": 15.576335906982422, |
|
"learning_rate": 8e-05, |
|
"loss": 2.116, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.014794520547945205, |
|
"grad_norm": 8.430649757385254, |
|
"learning_rate": 9e-05, |
|
"loss": 1.4004, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01643835616438356, |
|
"grad_norm": 4.160114288330078, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9875, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01808219178082192, |
|
"grad_norm": 4.697726726531982, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 0.8455, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.019726027397260273, |
|
"grad_norm": 3.292222499847412, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 0.7364, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.019726027397260273, |
|
"eval_loss": 0.734883189201355, |
|
"eval_runtime": 401.7356, |
|
"eval_samples_per_second": 1.914, |
|
"eval_steps_per_second": 0.48, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.021369863013698632, |
|
"grad_norm": 3.199315071105957, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 0.7187, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023013698630136987, |
|
"grad_norm": 3.5907278060913086, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.7173, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.024657534246575342, |
|
"grad_norm": 9.821634292602539, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 0.9381, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0263013698630137, |
|
"grad_norm": 3.7522168159484863, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 0.744, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.027945205479452055, |
|
"grad_norm": 2.0294668674468994, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 0.6902, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02958904109589041, |
|
"grad_norm": 2.3452353477478027, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.729, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02958904109589041, |
|
"eval_loss": 0.7356500029563904, |
|
"eval_runtime": 401.8552, |
|
"eval_samples_per_second": 1.914, |
|
"eval_steps_per_second": 0.48, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03123287671232877, |
|
"grad_norm": 3.9628705978393555, |
|
"learning_rate": 8.802029828000156e-05, |
|
"loss": 0.7545, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03287671232876712, |
|
"grad_norm": 1.9698278903961182, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.6791, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03452054794520548, |
|
"grad_norm": 1.0390818119049072, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 0.6828, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03616438356164384, |
|
"grad_norm": 3.9402551651000977, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.7803, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03780821917808219, |
|
"grad_norm": 6.37379264831543, |
|
"learning_rate": 7.612492823579745e-05, |
|
"loss": 0.883, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03945205479452055, |
|
"grad_norm": 3.2149131298065186, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 0.7417, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03945205479452055, |
|
"eval_loss": 0.7024959921836853, |
|
"eval_runtime": 401.4676, |
|
"eval_samples_per_second": 1.915, |
|
"eval_steps_per_second": 0.481, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0410958904109589, |
|
"grad_norm": 2.5781383514404297, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.7425, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.119015678246912e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|