|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0008518903446748335, |
|
"eval_steps": 25, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.407561378699334e-05, |
|
"grad_norm": 2.5001614093780518, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.825, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 3.407561378699334e-05, |
|
"eval_loss": 5.655045509338379, |
|
"eval_runtime": 5197.3091, |
|
"eval_samples_per_second": 2.378, |
|
"eval_steps_per_second": 1.189, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 6.815122757398668e-05, |
|
"grad_norm": 2.1226208209991455, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.6844, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00010222684136098001, |
|
"grad_norm": 2.5686793327331543, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1539, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00013630245514797336, |
|
"grad_norm": 4.603909015655518, |
|
"learning_rate": 9.99571699711836e-05, |
|
"loss": 4.0091, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0001703780689349667, |
|
"grad_norm": 2.2434098720550537, |
|
"learning_rate": 9.982876141412856e-05, |
|
"loss": 1.482, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00020445368272196002, |
|
"grad_norm": 2.6793839931488037, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 0.8997, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00023852929650895337, |
|
"grad_norm": 3.525517463684082, |
|
"learning_rate": 9.931634888554937e-05, |
|
"loss": 1.328, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0002726049102959467, |
|
"grad_norm": 4.246636867523193, |
|
"learning_rate": 9.893332032039701e-05, |
|
"loss": 1.3833, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00030668052408294006, |
|
"grad_norm": 5.746326923370361, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 1.1267, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0003407561378699334, |
|
"grad_norm": 5.541106700897217, |
|
"learning_rate": 9.791726278367022e-05, |
|
"loss": 0.8371, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0003748317516569267, |
|
"grad_norm": 4.492342472076416, |
|
"learning_rate": 9.728616793536588e-05, |
|
"loss": 0.8644, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00040890736544392005, |
|
"grad_norm": 5.070585250854492, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 0.5652, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0004429829792309134, |
|
"grad_norm": 3.305798292160034, |
|
"learning_rate": 9.578385041664925e-05, |
|
"loss": 0.5209, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00047705859301790674, |
|
"grad_norm": 1.7791708707809448, |
|
"learning_rate": 9.491548749301997e-05, |
|
"loss": 0.1586, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0005111342068049, |
|
"grad_norm": 4.680875301361084, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 0.5646, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0005452098205918934, |
|
"grad_norm": 3.8708016872406006, |
|
"learning_rate": 9.295261506157986e-05, |
|
"loss": 0.431, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0005792854343788867, |
|
"grad_norm": 4.305510520935059, |
|
"learning_rate": 9.186184199300464e-05, |
|
"loss": 0.4888, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0006133610481658801, |
|
"grad_norm": 4.049569606781006, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 0.2277, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0006474366619528734, |
|
"grad_norm": 3.324610948562622, |
|
"learning_rate": 8.947199994035401e-05, |
|
"loss": 0.2978, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0006815122757398668, |
|
"grad_norm": 3.7074697017669678, |
|
"learning_rate": 8.817748015645558e-05, |
|
"loss": 0.3028, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0007155878895268601, |
|
"grad_norm": 1.7295836210250854, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 0.0916, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0007496635033138534, |
|
"grad_norm": 2.569653272628784, |
|
"learning_rate": 8.540155934270471e-05, |
|
"loss": 0.3138, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0007837391171008468, |
|
"grad_norm": 2.379580020904541, |
|
"learning_rate": 8.392544243589427e-05, |
|
"loss": 0.2004, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0008178147308878401, |
|
"grad_norm": 2.292313814163208, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 0.1673, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0008518903446748335, |
|
"grad_norm": 2.342252731323242, |
|
"learning_rate": 8.081093963579707e-05, |
|
"loss": 0.2094, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0008518903446748335, |
|
"eval_loss": 0.17272759974002838, |
|
"eval_runtime": 5196.4661, |
|
"eval_samples_per_second": 2.378, |
|
"eval_steps_per_second": 1.189, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.014178406321357e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|