|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6644219977553311, |
|
"eval_steps": 500, |
|
"global_step": 296, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 1.6909, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.5942, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.2719, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.1349, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.0928, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 1.0533, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.0332, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 1.011, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 1.0103, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9977, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9992290362407232e-05, |
|
"loss": 0.9846, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9969173337331283e-05, |
|
"loss": 0.9936, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9930684569549265e-05, |
|
"loss": 0.9827, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 0.9814, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9807852804032306e-05, |
|
"loss": 0.9739, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9723699203976768e-05, |
|
"loss": 0.9794, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9624552364536472e-05, |
|
"loss": 0.9697, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9510565162951538e-05, |
|
"loss": 0.9789, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9381913359224844e-05, |
|
"loss": 0.9644, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9238795325112867e-05, |
|
"loss": 0.9623, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9081431738250815e-05, |
|
"loss": 0.9618, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.891006524188368e-05, |
|
"loss": 0.9699, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8724960070727974e-05, |
|
"loss": 0.9563, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8526401643540924e-05, |
|
"loss": 0.96, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8314696123025456e-05, |
|
"loss": 0.9571, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 0.9595, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.785316930880745e-05, |
|
"loss": 0.9538, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7604059656000313e-05, |
|
"loss": 0.9493, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7343225094356857e-05, |
|
"loss": 0.9565, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.9507, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.678800745532942e-05, |
|
"loss": 0.9539, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6494480483301836e-05, |
|
"loss": 0.9507, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6190939493098344e-05, |
|
"loss": 0.9514, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.5877852522924733e-05, |
|
"loss": 0.9511, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.5555702330196024e-05, |
|
"loss": 0.9508, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5224985647159489e-05, |
|
"loss": 0.9547, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.4886212414969551e-05, |
|
"loss": 0.9508, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.4539904997395468e-05, |
|
"loss": 0.9483, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.4186597375374283e-05, |
|
"loss": 0.9501, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.3826834323650899e-05, |
|
"loss": 0.9355, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.346117057077493e-05, |
|
"loss": 0.9385, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.3090169943749475e-05, |
|
"loss": 0.948, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.2714404498650743e-05, |
|
"loss": 0.9408, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.2334453638559057e-05, |
|
"loss": 0.9478, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.1950903220161286e-05, |
|
"loss": 0.9463, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.156434465040231e-05, |
|
"loss": 0.9313, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1175373974578378e-05, |
|
"loss": 0.9517, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.0784590957278452e-05, |
|
"loss": 0.9304, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0392598157590687e-05, |
|
"loss": 0.9368, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9428, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.607401842409318e-06, |
|
"loss": 0.9369, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.215409042721553e-06, |
|
"loss": 0.9326, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.824626025421625e-06, |
|
"loss": 0.9394, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.43565534959769e-06, |
|
"loss": 0.9411, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.04909677983872e-06, |
|
"loss": 0.9348, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.66554636144095e-06, |
|
"loss": 0.9363, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.285595501349259e-06, |
|
"loss": 0.9414, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.909830056250527e-06, |
|
"loss": 0.9365, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.538829429225068e-06, |
|
"loss": 0.9197, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.173165676349103e-06, |
|
"loss": 0.9276, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.931494951248169, |
|
"eval_runtime": 205.9305, |
|
"eval_samples_per_second": 122.828, |
|
"eval_steps_per_second": 0.481, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"step": 296, |
|
"total_flos": 495601644994560.0, |
|
"train_loss": 0.9806337251856521, |
|
"train_runtime": 6774.4676, |
|
"train_samples_per_second": 33.668, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 445, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 495601644994560.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|