|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990586758707248, |
|
"eval_steps": 500, |
|
"global_step": 398, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025101976780671477, |
|
"grad_norm": 11.570974273934945, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9144, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.050203953561342954, |
|
"grad_norm": 3.2868828349894663, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7934, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07530593034201444, |
|
"grad_norm": 1.3621580098977089, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7581, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10040790712268591, |
|
"grad_norm": 1.1465743337293792, |
|
"learning_rate": 5e-06, |
|
"loss": 0.718, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12550988390335738, |
|
"grad_norm": 1.8313362803176332, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6988, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15061186068402888, |
|
"grad_norm": 0.8106493906617342, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6753, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17571383746470035, |
|
"grad_norm": 1.3540466482658315, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6672, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.20081581424537182, |
|
"grad_norm": 0.5964949627381437, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6551, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2259177910260433, |
|
"grad_norm": 0.5688152556152026, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6532, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25101976780671476, |
|
"grad_norm": 0.6773755092137309, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6464, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27612174458738625, |
|
"grad_norm": 0.5774859088110325, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6405, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.30122372136805775, |
|
"grad_norm": 0.5522163553259679, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6312, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3263256981487292, |
|
"grad_norm": 0.5727079237004361, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6291, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3514276749294007, |
|
"grad_norm": 0.5373258099366229, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6348, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3765296517100722, |
|
"grad_norm": 0.5642619689588081, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6252, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.40163162849074363, |
|
"grad_norm": 0.647603552543901, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6219, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.42673360527141513, |
|
"grad_norm": 0.6137422702457818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6234, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4518355820520866, |
|
"grad_norm": 0.5654990824228964, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6139, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.47693755883275807, |
|
"grad_norm": 0.7024949610501936, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6195, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5020395356134295, |
|
"grad_norm": 0.5820569269038963, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.527141512394101, |
|
"grad_norm": 0.7020933693248848, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6086, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5522434891747725, |
|
"grad_norm": 0.604199603689463, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6188, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.577345465955444, |
|
"grad_norm": 0.5647954363455674, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6032, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6024474427361155, |
|
"grad_norm": 0.62231194375424, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6097, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.627549419516787, |
|
"grad_norm": 0.6954867326199731, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6062, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6526513962974584, |
|
"grad_norm": 0.6683159364143533, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6081, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6777533730781299, |
|
"grad_norm": 0.7460632306114617, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6051, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7028553498588014, |
|
"grad_norm": 0.5069551815996469, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6035, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7279573266394729, |
|
"grad_norm": 0.7002297529886885, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6037, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7530593034201444, |
|
"grad_norm": 0.6956058201700761, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5979, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7781612802008158, |
|
"grad_norm": 0.5991932647306116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5981, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8032632569814873, |
|
"grad_norm": 0.5275465592448361, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5933, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8283652337621588, |
|
"grad_norm": 0.6290119559084576, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5945, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8534672105428303, |
|
"grad_norm": 0.5130802873233022, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5936, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8785691873235018, |
|
"grad_norm": 0.7421613361549775, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5948, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9036711641041733, |
|
"grad_norm": 0.6153969499038726, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5824, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9287731408848446, |
|
"grad_norm": 0.6034204487183606, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5847, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9538751176655161, |
|
"grad_norm": 0.592301687922246, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5908, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9789770944461876, |
|
"grad_norm": 0.6008165953019412, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5911, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9990586758707248, |
|
"eval_loss": 0.5867129564285278, |
|
"eval_runtime": 271.3794, |
|
"eval_samples_per_second": 39.553, |
|
"eval_steps_per_second": 0.619, |
|
"step": 398 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1194, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 667083582996480.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|