|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.5842293906810037, |
|
"eval_steps": 500, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14336917562724014, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019936113105200085, |
|
"loss": 25.3924, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2867383512544803, |
|
"grad_norm": 3.884701728820801, |
|
"learning_rate": 0.00019745268727865774, |
|
"loss": 8.0947, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.43010752688172044, |
|
"grad_norm": 4.083174705505371, |
|
"learning_rate": 0.00019387338576538744, |
|
"loss": 8.0048, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5734767025089605, |
|
"grad_norm": 8.547293663024902, |
|
"learning_rate": 0.00018881364488135448, |
|
"loss": 8.2356, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7168458781362007, |
|
"grad_norm": 5.096670150756836, |
|
"learning_rate": 0.00018235325976284275, |
|
"loss": 7.6645, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8602150537634409, |
|
"grad_norm": 2.093186855316162, |
|
"learning_rate": 0.00017459411454241822, |
|
"loss": 6.8675, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.003584229390681, |
|
"grad_norm": 6.611547946929932, |
|
"learning_rate": 0.00016565857557529566, |
|
"loss": 7.6999, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.146953405017921, |
|
"grad_norm": 8.38244915008545, |
|
"learning_rate": 0.00015568756164881882, |
|
"loss": 5.8161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 7.428552150726318, |
|
"learning_rate": 0.00014483832160900326, |
|
"loss": 5.076, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4336917562724014, |
|
"grad_norm": 9.673376083374023, |
|
"learning_rate": 0.00013328195445229868, |
|
"loss": 5.8924, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5770609318996416, |
|
"grad_norm": 9.069914817810059, |
|
"learning_rate": 0.00012120071099220549, |
|
"loss": 11.4614, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7204301075268817, |
|
"grad_norm": 0.6971492767333984, |
|
"learning_rate": 0.00010878511965507434, |
|
"loss": 6.7866, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.863799283154122, |
|
"grad_norm": 5.74509334564209, |
|
"learning_rate": 9.623098173300654e-05, |
|
"loss": 5.4278, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.007168458781362, |
|
"grad_norm": 6.082172870635986, |
|
"learning_rate": 8.497744108792429e-05, |
|
"loss": 6.2175, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"grad_norm": 9.069826126098633, |
|
"learning_rate": 7.270480644826749e-05, |
|
"loss": 3.6583, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.293906810035842, |
|
"grad_norm": 5.0468926429748535, |
|
"learning_rate": 6.086263331627976e-05, |
|
"loss": 7.1881, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.4372759856630823, |
|
"grad_norm": 8.837244987487793, |
|
"learning_rate": 4.9637679836423924e-05, |
|
"loss": 5.8196, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.5806451612903225, |
|
"grad_norm": 6.962499618530273, |
|
"learning_rate": 3.920697023053949e-05, |
|
"loss": 6.0987, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7240143369175627, |
|
"grad_norm": 7.161827564239502, |
|
"learning_rate": 2.9735003020115092e-05, |
|
"loss": 6.0444, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.867383512544803, |
|
"grad_norm": 4.73717737197876, |
|
"learning_rate": 2.137115678633811e-05, |
|
"loss": 5.409, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.010752688172043, |
|
"grad_norm": 4.852635860443115, |
|
"learning_rate": 1.4247334380634792e-05, |
|
"loss": 3.4152, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.154121863799283, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.475882737908248e-06, |
|
"loss": 2.9892, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.2974910394265233, |
|
"grad_norm": 5.022617816925049, |
|
"learning_rate": 4.147821098262405e-06, |
|
"loss": 3.2551, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.4408602150537635, |
|
"grad_norm": 8.784795761108398, |
|
"learning_rate": 1.3314055792131964e-06, |
|
"loss": 3.7666, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.5842293906810037, |
|
"grad_norm": 13.812480926513672, |
|
"learning_rate": 7.105273594107953e-08, |
|
"loss": 7.4014, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 20000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 553129082880000.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|