|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.005108491590145719, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.1084915901457196e-05, |
|
"eval_loss": 3.4734065532684326, |
|
"eval_runtime": 182.7499, |
|
"eval_samples_per_second": 180.405, |
|
"eval_steps_per_second": 22.555, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00015325474770437158, |
|
"grad_norm": 1.0560590028762817, |
|
"learning_rate": 1.5e-05, |
|
"loss": 3.4321, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00030650949540874317, |
|
"grad_norm": 1.1394942998886108, |
|
"learning_rate": 3e-05, |
|
"loss": 3.4168, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0004597642431131148, |
|
"grad_norm": 1.2241615056991577, |
|
"learning_rate": 4.5e-05, |
|
"loss": 3.366, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0004597642431131148, |
|
"eval_loss": 3.415942430496216, |
|
"eval_runtime": 183.8456, |
|
"eval_samples_per_second": 179.33, |
|
"eval_steps_per_second": 22.421, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0006130189908174863, |
|
"grad_norm": 1.198073148727417, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 3.4657, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.000766273738521858, |
|
"grad_norm": 1.0100293159484863, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 3.2748, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0009195284862262296, |
|
"grad_norm": 0.9332877397537231, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 3.246, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0009195284862262296, |
|
"eval_loss": 3.144437313079834, |
|
"eval_runtime": 184.0033, |
|
"eval_samples_per_second": 179.176, |
|
"eval_steps_per_second": 22.402, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0010727832339306011, |
|
"grad_norm": 0.9414787292480469, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 3.0904, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0012260379816349727, |
|
"grad_norm": 1.0662180185317993, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 2.9901, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0013792927293393444, |
|
"grad_norm": 0.9672358632087708, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 3.0054, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0013792927293393444, |
|
"eval_loss": 2.9016616344451904, |
|
"eval_runtime": 183.1925, |
|
"eval_samples_per_second": 179.969, |
|
"eval_steps_per_second": 22.501, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.001532547477043716, |
|
"grad_norm": 0.9545574188232422, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 2.804, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0016858022247480875, |
|
"grad_norm": 0.8684502243995667, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 2.7914, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0018390569724524592, |
|
"grad_norm": 0.8820082545280457, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 2.7448, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0018390569724524592, |
|
"eval_loss": 2.6998445987701416, |
|
"eval_runtime": 182.5973, |
|
"eval_samples_per_second": 180.556, |
|
"eval_steps_per_second": 22.574, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0019923117201568305, |
|
"grad_norm": 0.9635749459266663, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 2.6368, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0021455664678612023, |
|
"grad_norm": 1.0027378797531128, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 2.6553, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.002298821215565574, |
|
"grad_norm": 0.9840696454048157, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 2.6122, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.002298821215565574, |
|
"eval_loss": 2.5452194213867188, |
|
"eval_runtime": 182.8932, |
|
"eval_samples_per_second": 180.264, |
|
"eval_steps_per_second": 22.538, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0024520759632699453, |
|
"grad_norm": 0.9413293600082397, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 2.5858, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.002605330710974317, |
|
"grad_norm": 1.006203293800354, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 2.4787, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.002758585458678689, |
|
"grad_norm": 1.007853388786316, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 2.4619, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.002758585458678689, |
|
"eval_loss": 2.424272298812866, |
|
"eval_runtime": 182.9352, |
|
"eval_samples_per_second": 180.222, |
|
"eval_steps_per_second": 22.533, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.00291184020638306, |
|
"grad_norm": 1.1123751401901245, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 2.3757, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.003065094954087432, |
|
"grad_norm": 1.0468323230743408, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 2.3774, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0032183497017918036, |
|
"grad_norm": 1.2713088989257812, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 2.3676, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0032183497017918036, |
|
"eval_loss": 2.315412759780884, |
|
"eval_runtime": 183.2875, |
|
"eval_samples_per_second": 179.876, |
|
"eval_steps_per_second": 22.489, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.003371604449496175, |
|
"grad_norm": 1.2722467184066772, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 2.3911, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0035248591972005467, |
|
"grad_norm": 1.2411410808563232, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 2.2346, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0036781139449049184, |
|
"grad_norm": 1.144443392753601, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 2.3083, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0036781139449049184, |
|
"eval_loss": 2.2295360565185547, |
|
"eval_runtime": 182.7601, |
|
"eval_samples_per_second": 180.395, |
|
"eval_steps_per_second": 22.554, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0038313686926092897, |
|
"grad_norm": 1.25135338306427, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 2.2523, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.003984623440313661, |
|
"grad_norm": 1.035020112991333, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 2.2171, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.004137878188018033, |
|
"grad_norm": 1.0950953960418701, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 2.1921, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.004137878188018033, |
|
"eval_loss": 2.1888010501861572, |
|
"eval_runtime": 182.9199, |
|
"eval_samples_per_second": 180.237, |
|
"eval_steps_per_second": 22.534, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0042911329357224045, |
|
"grad_norm": 1.026548981666565, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 2.172, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.004444387683426776, |
|
"grad_norm": 0.9955553412437439, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 2.1886, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.004597642431131148, |
|
"grad_norm": 1.0280815362930298, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 2.1352, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.004597642431131148, |
|
"eval_loss": 2.1757705211639404, |
|
"eval_runtime": 182.8294, |
|
"eval_samples_per_second": 180.327, |
|
"eval_steps_per_second": 22.546, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00475089717883552, |
|
"grad_norm": 1.1763354539871216, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 2.2136, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.004904151926539891, |
|
"grad_norm": 1.103149652481079, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 2.2201, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.005057406674244262, |
|
"grad_norm": 1.0177910327911377, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 2.1461, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.005057406674244262, |
|
"eval_loss": 2.1734840869903564, |
|
"eval_runtime": 182.7199, |
|
"eval_samples_per_second": 180.435, |
|
"eval_steps_per_second": 22.559, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2776751721676800.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|