|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9974554707379135, |
|
"eval_steps": 500, |
|
"global_step": 49, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.020356234096692113, |
|
"grad_norm": 0.1697569042444229, |
|
"learning_rate": 2e-05, |
|
"loss": 1.8967, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 0.16455122828483582, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9742, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 0.1334070861339569, |
|
"learning_rate": 6e-05, |
|
"loss": 1.8263, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 0.19785284996032715, |
|
"learning_rate": 8e-05, |
|
"loss": 1.8359, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.10178117048346055, |
|
"grad_norm": 0.16298870742321014, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8452, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 0.1491611897945404, |
|
"learning_rate": 0.00012, |
|
"loss": 1.8554, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.14249363867684478, |
|
"grad_norm": 0.14379450678825378, |
|
"learning_rate": 0.00014, |
|
"loss": 1.7928, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 0.11020653694868088, |
|
"learning_rate": 0.00016, |
|
"loss": 1.8638, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 0.20387399196624756, |
|
"learning_rate": 0.00018, |
|
"loss": 1.6861, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 0.18219953775405884, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7959, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22391857506361323, |
|
"grad_norm": 0.15169978141784668, |
|
"learning_rate": 0.00019967573081342103, |
|
"loss": 1.827, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 0.08840368688106537, |
|
"learning_rate": 0.00019870502626379127, |
|
"loss": 1.8324, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.26463104325699743, |
|
"grad_norm": 0.08657640218734741, |
|
"learning_rate": 0.0001970941817426052, |
|
"loss": 1.8853, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 0.10179179906845093, |
|
"learning_rate": 0.00019485364419471454, |
|
"loss": 1.8181, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 0.08559463918209076, |
|
"learning_rate": 0.00019199794436588243, |
|
"loss": 1.9153, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 0.1002177819609642, |
|
"learning_rate": 0.000188545602565321, |
|
"loss": 1.869, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3460559796437659, |
|
"grad_norm": 0.08648160099983215, |
|
"learning_rate": 0.0001845190085543795, |
|
"loss": 1.7103, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 0.0936996191740036, |
|
"learning_rate": 0.00017994427634035015, |
|
"loss": 1.906, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.38676844783715014, |
|
"grad_norm": 0.09296493977308273, |
|
"learning_rate": 0.00017485107481711012, |
|
"loss": 1.8598, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 0.08220034092664719, |
|
"learning_rate": 0.00016927243535095997, |
|
"loss": 1.7883, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 0.08138112723827362, |
|
"learning_rate": 0.00016324453755953773, |
|
"loss": 1.839, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 0.089788056910038, |
|
"learning_rate": 0.00015680647467311557, |
|
"loss": 1.9511, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4681933842239186, |
|
"grad_norm": 0.08236150443553925, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.7003, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 0.0797291249036789, |
|
"learning_rate": 0.00014286925614030542, |
|
"loss": 1.8304, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"grad_norm": 0.11056578904390335, |
|
"learning_rate": 0.00013546048870425356, |
|
"loss": 1.8363, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5292620865139949, |
|
"grad_norm": 0.08862095326185226, |
|
"learning_rate": 0.0001278217463916453, |
|
"loss": 1.8398, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.549618320610687, |
|
"grad_norm": 0.07724796235561371, |
|
"learning_rate": 0.00012000256937760445, |
|
"loss": 1.8102, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5699745547073791, |
|
"grad_norm": 0.08631068468093872, |
|
"learning_rate": 0.0001120536680255323, |
|
"loss": 1.7571, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5903307888040712, |
|
"grad_norm": 0.0852205753326416, |
|
"learning_rate": 0.00010402659401094152, |
|
"loss": 1.7623, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 0.09490972757339478, |
|
"learning_rate": 9.597340598905852e-05, |
|
"loss": 1.7686, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6310432569974554, |
|
"grad_norm": 0.0830993726849556, |
|
"learning_rate": 8.79463319744677e-05, |
|
"loss": 1.8706, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6513994910941476, |
|
"grad_norm": 0.1042318046092987, |
|
"learning_rate": 7.999743062239557e-05, |
|
"loss": 1.8053, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6717557251908397, |
|
"grad_norm": 0.07309404015541077, |
|
"learning_rate": 7.217825360835473e-05, |
|
"loss": 1.7835, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6921119592875318, |
|
"grad_norm": 0.07798247784376144, |
|
"learning_rate": 6.453951129574644e-05, |
|
"loss": 1.8351, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.712468193384224, |
|
"grad_norm": 0.10470892488956451, |
|
"learning_rate": 5.713074385969457e-05, |
|
"loss": 1.6869, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 0.062327221035957336, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.8249, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7531806615776081, |
|
"grad_norm": 0.09498722851276398, |
|
"learning_rate": 4.3193525326884435e-05, |
|
"loss": 1.6566, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.7735368956743003, |
|
"grad_norm": 0.07665014266967773, |
|
"learning_rate": 3.675546244046228e-05, |
|
"loss": 1.766, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.7938931297709924, |
|
"grad_norm": 0.07265973836183548, |
|
"learning_rate": 3.072756464904006e-05, |
|
"loss": 1.789, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8142493638676844, |
|
"grad_norm": 0.07454241812229156, |
|
"learning_rate": 2.514892518288988e-05, |
|
"loss": 1.831, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8346055979643766, |
|
"grad_norm": 0.07916758209466934, |
|
"learning_rate": 2.0055723659649904e-05, |
|
"loss": 1.8673, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8549618320610687, |
|
"grad_norm": 0.0858256071805954, |
|
"learning_rate": 1.5480991445620542e-05, |
|
"loss": 1.6331, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.8753180661577609, |
|
"grad_norm": 0.09055773913860321, |
|
"learning_rate": 1.1454397434679021e-05, |
|
"loss": 1.6856, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.8956743002544529, |
|
"grad_norm": 0.08801382780075073, |
|
"learning_rate": 8.002055634117578e-06, |
|
"loss": 1.7838, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.916030534351145, |
|
"grad_norm": 0.08115601539611816, |
|
"learning_rate": 5.146355805285452e-06, |
|
"loss": 1.7345, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9363867684478372, |
|
"grad_norm": 0.07437321543693542, |
|
"learning_rate": 2.905818257394799e-06, |
|
"loss": 1.84, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.9567430025445293, |
|
"grad_norm": 0.08822711557149887, |
|
"learning_rate": 1.2949737362087156e-06, |
|
"loss": 1.6885, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.9770992366412213, |
|
"grad_norm": 0.09608830511569977, |
|
"learning_rate": 3.2426918657900704e-07, |
|
"loss": 1.6825, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.9974554707379135, |
|
"grad_norm": 0.07894308120012283, |
|
"learning_rate": 0.0, |
|
"loss": 1.6886, |
|
"step": 49 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 49, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.32140082998018e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|