|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.021194673072167863, |
|
"eval_steps": 13, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00014129782048111908, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.7619, |
|
"eval_samples_per_second": 50.559, |
|
"eval_steps_per_second": 6.32, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0004238934614433572, |
|
"grad_norm": NaN, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0008477869228867144, |
|
"grad_norm": NaN, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0012716803843300717, |
|
"grad_norm": NaN, |
|
"learning_rate": 9e-05, |
|
"loss": 0.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0016955738457734289, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.994965332706573e-05, |
|
"loss": 0.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.001836871666254548, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.922, |
|
"eval_samples_per_second": 50.525, |
|
"eval_steps_per_second": 6.316, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002119467307216786, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.968561049466214e-05, |
|
"loss": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0025433607686601435, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.919647942993148e-05, |
|
"loss": 0.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.002967254230103501, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.848447601883435e-05, |
|
"loss": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0033911476915468577, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003673743332509096, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.9223, |
|
"eval_samples_per_second": 50.525, |
|
"eval_steps_per_second": 6.316, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.003815041152990215, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.640574942595196e-05, |
|
"loss": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004238934614433572, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.504844339512095e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00466282807587693, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.348705665778478e-05, |
|
"loss": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.005086721537320287, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.172866268606513e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005510614998763644, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.978122744408906e-05, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.005510614998763644, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.965, |
|
"eval_samples_per_second": 50.516, |
|
"eval_steps_per_second": 6.314, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.005934508460207002, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.765357330018056e-05, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.006358401921650359, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.006782295383093715, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.289693629698564e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.007206188844537073, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.0289502192041e-05, |
|
"loss": 0.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.007347486665018192, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.958, |
|
"eval_samples_per_second": 50.517, |
|
"eval_steps_per_second": 6.315, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.00763008230598043, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.754484907260513e-05, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.008053975767423787, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.467541090321735e-05, |
|
"loss": 0.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.008477869228867145, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.169418695587791e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008901762690310502, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.861468292009727e-05, |
|
"loss": 0.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.00918435833127274, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.8169, |
|
"eval_samples_per_second": 50.548, |
|
"eval_steps_per_second": 6.318, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.00932565615175386, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.009749549613197217, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.22170203068947e-05, |
|
"loss": 0.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.010173443074640574, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.8927844739931834e-05, |
|
"loss": 0.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.010597336536083931, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.559822380516539e-05, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.011021229997527289, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.2243241517525754e-05, |
|
"loss": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.011021229997527289, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.731, |
|
"eval_samples_per_second": 50.566, |
|
"eval_steps_per_second": 6.321, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.011445123458970646, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.887809678520976e-05, |
|
"loss": 0.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.011869016920414003, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.551803455482833e-05, |
|
"loss": 0.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.01229291038185736, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 0.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.012716803843300718, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.887395330218429e-05, |
|
"loss": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.012858101663781835, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.7733, |
|
"eval_samples_per_second": 50.557, |
|
"eval_steps_per_second": 6.32, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.013140697304744074, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.562003362839914e-05, |
|
"loss": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.01356459076618743, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.243125879593286e-05, |
|
"loss": 0.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.013988484227630788, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.932207475167398e-05, |
|
"loss": 0.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.014412377689074145, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.630656687635007e-05, |
|
"loss": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.014694973330036384, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.7296, |
|
"eval_samples_per_second": 50.566, |
|
"eval_steps_per_second": 6.321, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.014836271150517503, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.3398396174233178e-05, |
|
"loss": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01526016461196086, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.015684058073404217, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.7956219300748793e-05, |
|
"loss": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.016107951534847575, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.544686755065677e-05, |
|
"loss": 0.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.016531844996290932, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.3094050125632972e-05, |
|
"loss": 0.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.016531844996290932, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.7863, |
|
"eval_samples_per_second": 50.554, |
|
"eval_steps_per_second": 6.319, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.01695573845773429, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.090842587659851e-05, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.017379631919177647, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.899896227604509e-06, |
|
"loss": 0.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.017803525380621004, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.077560319906695e-06, |
|
"loss": 0.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.01822741884206436, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 0.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.01836871666254548, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.7866, |
|
"eval_samples_per_second": 50.554, |
|
"eval_steps_per_second": 6.319, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01865131230350772, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.023611372427471e-06, |
|
"loss": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.019075205764951076, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.8058334845816213e-06, |
|
"loss": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.019499099226394433, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.8018569652073381e-06, |
|
"loss": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01992299268783779, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.016230078838226e-06, |
|
"loss": 0.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02020558832880003, |
|
"eval_loss": NaN, |
|
"eval_runtime": 235.8437, |
|
"eval_samples_per_second": 50.542, |
|
"eval_steps_per_second": 6.318, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.020346886149281148, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.52511911603265e-07, |
|
"loss": 0.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.020770779610724505, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.132562476771959e-07, |
|
"loss": 0.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.021194673072167863, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.19972177870848e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|