|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 21249, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.023530519083250975, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 4.882347404583745e-05, |
|
"loss": 0.9427, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04706103816650195, |
|
"grad_norm": 1.75, |
|
"learning_rate": 4.76469480916749e-05, |
|
"loss": 0.8108, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07059155724975293, |
|
"grad_norm": 1.8671875, |
|
"learning_rate": 4.647042213751235e-05, |
|
"loss": 0.7777, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0941220763330039, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 4.52938961833498e-05, |
|
"loss": 0.7551, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11765259541625488, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 4.411737022918726e-05, |
|
"loss": 0.7373, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.14118311449950585, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 4.294084427502471e-05, |
|
"loss": 0.727, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16471363358275684, |
|
"grad_norm": 1.84375, |
|
"learning_rate": 4.1764318320862164e-05, |
|
"loss": 0.718, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1882441526660078, |
|
"grad_norm": 1.7265625, |
|
"learning_rate": 4.0587792366699614e-05, |
|
"loss": 0.7153, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2117746717492588, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 3.9411266412537063e-05, |
|
"loss": 0.7067, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.23530519083250975, |
|
"grad_norm": 1.6796875, |
|
"learning_rate": 3.823474045837451e-05, |
|
"loss": 0.7037, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2588357099157607, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 3.705821450421196e-05, |
|
"loss": 0.6984, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.2823662289990117, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 3.588168855004941e-05, |
|
"loss": 0.6954, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3058967480822627, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 3.470516259588686e-05, |
|
"loss": 0.6948, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3294272671655137, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 3.352863664172432e-05, |
|
"loss": 0.6908, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35295778624876467, |
|
"grad_norm": 1.6796875, |
|
"learning_rate": 3.235211068756177e-05, |
|
"loss": 0.6873, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3764883053320156, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 3.117558473339922e-05, |
|
"loss": 0.6876, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4000188244152666, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 2.999905877923667e-05, |
|
"loss": 0.6879, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.4235493434985176, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 2.882253282507412e-05, |
|
"loss": 0.6862, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.4470798625817686, |
|
"grad_norm": 1.8359375, |
|
"learning_rate": 2.7646006870911574e-05, |
|
"loss": 0.6829, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.4706103816650195, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 2.6469480916749024e-05, |
|
"loss": 0.6852, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.4941409007482705, |
|
"grad_norm": 1.5, |
|
"learning_rate": 2.5292954962586474e-05, |
|
"loss": 0.681, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.5176714198315214, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 2.4116429008423927e-05, |
|
"loss": 0.6798, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.5412019389147724, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 2.2939903054261376e-05, |
|
"loss": 0.6803, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.5647324579980234, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 2.176337710009883e-05, |
|
"loss": 0.6799, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.5882629770812744, |
|
"grad_norm": 1.875, |
|
"learning_rate": 2.0586851145936283e-05, |
|
"loss": 0.6761, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.6117934961645254, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 1.9410325191773732e-05, |
|
"loss": 0.675, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.6353240152477764, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 1.8233799237611182e-05, |
|
"loss": 0.6785, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.6588545343310274, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 1.7057273283448632e-05, |
|
"loss": 0.6782, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.6823850534142784, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 1.5880747329286085e-05, |
|
"loss": 0.6761, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.7059155724975293, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 1.4704221375123536e-05, |
|
"loss": 0.6795, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.7294460915807802, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 1.3527695420960988e-05, |
|
"loss": 0.6774, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.7529766106640312, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 1.2351169466798439e-05, |
|
"loss": 0.6777, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.7765071297472822, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 1.1174643512635889e-05, |
|
"loss": 0.6746, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.8000376488305332, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 9.998117558473342e-06, |
|
"loss": 0.6782, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.8235681679137842, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 8.821591604310792e-06, |
|
"loss": 0.6776, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.8470986869970352, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 7.645065650148241e-06, |
|
"loss": 0.6782, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.8706292060802862, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 6.468539695985694e-06, |
|
"loss": 0.6754, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.8941597251635371, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 5.292013741823145e-06, |
|
"loss": 0.6757, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.9176902442467881, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 4.1154877876605964e-06, |
|
"loss": 0.6765, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.941220763330039, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 2.938961833498047e-06, |
|
"loss": 0.6747, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.96475128241329, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 1.7624358793354984e-06, |
|
"loss": 0.677, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.988281801496541, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 5.859099251729494e-07, |
|
"loss": 0.6804, |
|
"step": 21000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21249, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.787078596423629e+17, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|