|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8928571428571429, |
|
"eval_steps": 5, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017857142857142856, |
|
"eval_loss": 0.6113489270210266, |
|
"eval_runtime": 3.2137, |
|
"eval_samples_per_second": 29.561, |
|
"eval_steps_per_second": 3.734, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05357142857142857, |
|
"grad_norm": 0.8834459781646729, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5885, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08928571428571429, |
|
"eval_loss": 0.4982052445411682, |
|
"eval_runtime": 2.6844, |
|
"eval_samples_per_second": 35.39, |
|
"eval_steps_per_second": 4.47, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.10714285714285714, |
|
"grad_norm": 0.6803084015846252, |
|
"learning_rate": 6e-05, |
|
"loss": 0.4985, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16071428571428573, |
|
"grad_norm": 1.0455479621887207, |
|
"learning_rate": 9e-05, |
|
"loss": 0.4033, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"eval_loss": 0.37860435247421265, |
|
"eval_runtime": 2.6972, |
|
"eval_samples_per_second": 35.222, |
|
"eval_steps_per_second": 4.449, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 0.5007634162902832, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 0.3331, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"grad_norm": 0.4750365614891052, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 0.3576, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"eval_loss": 0.334573894739151, |
|
"eval_runtime": 2.6977, |
|
"eval_samples_per_second": 35.215, |
|
"eval_steps_per_second": 4.448, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.32142857142857145, |
|
"grad_norm": 0.47219088673591614, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.2991, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"eval_loss": 0.3077447712421417, |
|
"eval_runtime": 2.7069, |
|
"eval_samples_per_second": 35.095, |
|
"eval_steps_per_second": 4.433, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.3391444683074951, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 0.2959, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.41816258430480957, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 0.2976, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"eval_loss": 0.3027045726776123, |
|
"eval_runtime": 2.7073, |
|
"eval_samples_per_second": 35.091, |
|
"eval_steps_per_second": 4.433, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.48214285714285715, |
|
"grad_norm": 0.37434259057044983, |
|
"learning_rate": 6.167226819279528e-05, |
|
"loss": 0.2523, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 0.47235336899757385, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2896, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"eval_loss": 0.29465004801750183, |
|
"eval_runtime": 2.7171, |
|
"eval_samples_per_second": 34.964, |
|
"eval_steps_per_second": 4.417, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5892857142857143, |
|
"grad_norm": 0.3799128830432892, |
|
"learning_rate": 3.832773180720475e-05, |
|
"loss": 0.2806, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"eval_loss": 0.29076987504959106, |
|
"eval_runtime": 2.7183, |
|
"eval_samples_per_second": 34.948, |
|
"eval_steps_per_second": 4.415, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 0.3711701035499573, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 0.2933, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6964285714285714, |
|
"grad_norm": 0.34898632764816284, |
|
"learning_rate": 1.7527597583490822e-05, |
|
"loss": 0.2671, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"eval_loss": 0.28889867663383484, |
|
"eval_runtime": 2.708, |
|
"eval_samples_per_second": 35.081, |
|
"eval_steps_per_second": 4.431, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.3814017176628113, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.27, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.8035714285714286, |
|
"grad_norm": 0.3966533839702606, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.3033, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8035714285714286, |
|
"eval_loss": 0.2880026698112488, |
|
"eval_runtime": 2.7104, |
|
"eval_samples_per_second": 35.05, |
|
"eval_steps_per_second": 4.427, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.3969070613384247, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 0.2683, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"eval_loss": 0.2877660393714905, |
|
"eval_runtime": 2.7107, |
|
"eval_samples_per_second": 35.047, |
|
"eval_steps_per_second": 4.427, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3607236534272e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|