|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1013299556681444, |
|
"eval_steps": 3, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00506649778340722, |
|
"grad_norm": 4.028384208679199, |
|
"learning_rate": 2e-05, |
|
"loss": 3.5544, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00506649778340722, |
|
"eval_loss": 3.5219967365264893, |
|
"eval_runtime": 17.0746, |
|
"eval_samples_per_second": 9.781, |
|
"eval_steps_per_second": 4.92, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01013299556681444, |
|
"grad_norm": 4.173046588897705, |
|
"learning_rate": 4e-05, |
|
"loss": 3.9016, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015199493350221659, |
|
"grad_norm": 3.8153645992279053, |
|
"learning_rate": 6e-05, |
|
"loss": 3.3707, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.015199493350221659, |
|
"eval_loss": 3.4256696701049805, |
|
"eval_runtime": 17.0755, |
|
"eval_samples_per_second": 9.78, |
|
"eval_steps_per_second": 4.919, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02026599113362888, |
|
"grad_norm": 3.7728078365325928, |
|
"learning_rate": 8e-05, |
|
"loss": 3.2427, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0253324889170361, |
|
"grad_norm": 4.0927228927612305, |
|
"learning_rate": 0.0001, |
|
"loss": 2.8313, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.030398986700443317, |
|
"grad_norm": 4.602127552032471, |
|
"learning_rate": 0.00012, |
|
"loss": 3.0358, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.030398986700443317, |
|
"eval_loss": 2.225193500518799, |
|
"eval_runtime": 17.159, |
|
"eval_samples_per_second": 9.733, |
|
"eval_steps_per_second": 4.895, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03546548448385054, |
|
"grad_norm": 4.011180877685547, |
|
"learning_rate": 0.00014, |
|
"loss": 1.9538, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04053198226725776, |
|
"grad_norm": 3.0159177780151367, |
|
"learning_rate": 0.00016, |
|
"loss": 1.4574, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.045598480050664976, |
|
"grad_norm": 3.2096781730651855, |
|
"learning_rate": 0.00018, |
|
"loss": 1.4471, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.045598480050664976, |
|
"eval_loss": 1.0822527408599854, |
|
"eval_runtime": 17.0804, |
|
"eval_samples_per_second": 9.777, |
|
"eval_steps_per_second": 4.918, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0506649778340722, |
|
"grad_norm": 3.9092917442321777, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0191, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05573147561747942, |
|
"grad_norm": 3.5135035514831543, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 0.4289, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.060797973400886635, |
|
"grad_norm": 3.6494600772857666, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.3012, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.060797973400886635, |
|
"eval_loss": 0.1743772327899933, |
|
"eval_runtime": 17.1197, |
|
"eval_samples_per_second": 9.755, |
|
"eval_steps_per_second": 4.907, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06586447118429385, |
|
"grad_norm": 1.6839953660964966, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.1816, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07093096896770108, |
|
"grad_norm": 1.779843807220459, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 0.1379, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0759974667511083, |
|
"grad_norm": 3.1982781887054443, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.2963, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0759974667511083, |
|
"eval_loss": 0.11545062810182571, |
|
"eval_runtime": 17.145, |
|
"eval_samples_per_second": 9.74, |
|
"eval_steps_per_second": 4.899, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08106396453451552, |
|
"grad_norm": 0.8417478203773499, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.0254, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08613046231792273, |
|
"grad_norm": 1.9363985061645508, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 0.1866, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09119696010132995, |
|
"grad_norm": 1.8391631841659546, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 0.1333, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09119696010132995, |
|
"eval_loss": 0.12381552159786224, |
|
"eval_runtime": 17.1973, |
|
"eval_samples_per_second": 9.711, |
|
"eval_steps_per_second": 4.884, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09626345788473717, |
|
"grad_norm": 1.8057817220687866, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 0.1361, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1013299556681444, |
|
"grad_norm": 1.080073595046997, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0566, |
|
"step": 20 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.64437646753792e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|