{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9974554707379135, "eval_steps": 500, "global_step": 49, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020356234096692113, "grad_norm": 0.1697569042444229, "learning_rate": 2e-05, "loss": 1.8967, "step": 1 }, { "epoch": 0.04071246819338423, "grad_norm": 0.16455122828483582, "learning_rate": 4e-05, "loss": 1.9742, "step": 2 }, { "epoch": 0.061068702290076333, "grad_norm": 0.1334070861339569, "learning_rate": 6e-05, "loss": 1.8263, "step": 3 }, { "epoch": 0.08142493638676845, "grad_norm": 0.19785284996032715, "learning_rate": 8e-05, "loss": 1.8359, "step": 4 }, { "epoch": 0.10178117048346055, "grad_norm": 0.16298870742321014, "learning_rate": 0.0001, "loss": 1.8452, "step": 5 }, { "epoch": 0.12213740458015267, "grad_norm": 0.1491611897945404, "learning_rate": 0.00012, "loss": 1.8554, "step": 6 }, { "epoch": 0.14249363867684478, "grad_norm": 0.14379450678825378, "learning_rate": 0.00014, "loss": 1.7928, "step": 7 }, { "epoch": 0.1628498727735369, "grad_norm": 0.11020653694868088, "learning_rate": 0.00016, "loss": 1.8638, "step": 8 }, { "epoch": 0.183206106870229, "grad_norm": 0.20387399196624756, "learning_rate": 0.00018, "loss": 1.6861, "step": 9 }, { "epoch": 0.2035623409669211, "grad_norm": 0.18219953775405884, "learning_rate": 0.0002, "loss": 1.7959, "step": 10 }, { "epoch": 0.22391857506361323, "grad_norm": 0.15169978141784668, "learning_rate": 0.00019967573081342103, "loss": 1.827, "step": 11 }, { "epoch": 0.24427480916030533, "grad_norm": 0.08840368688106537, "learning_rate": 0.00019870502626379127, "loss": 1.8324, "step": 12 }, { "epoch": 0.26463104325699743, "grad_norm": 0.08657640218734741, "learning_rate": 0.0001970941817426052, "loss": 1.8853, "step": 13 }, { "epoch": 0.28498727735368956, "grad_norm": 0.10179179906845093, "learning_rate": 0.00019485364419471454, "loss": 1.8181, "step": 14 }, { "epoch": 0.3053435114503817, "grad_norm": 0.08559463918209076, "learning_rate": 0.00019199794436588243, "loss": 1.9153, "step": 15 }, { "epoch": 0.3256997455470738, "grad_norm": 0.1002177819609642, "learning_rate": 0.000188545602565321, "loss": 1.869, "step": 16 }, { "epoch": 0.3460559796437659, "grad_norm": 0.08648160099983215, "learning_rate": 0.0001845190085543795, "loss": 1.7103, "step": 17 }, { "epoch": 0.366412213740458, "grad_norm": 0.0936996191740036, "learning_rate": 0.00017994427634035015, "loss": 1.906, "step": 18 }, { "epoch": 0.38676844783715014, "grad_norm": 0.09296493977308273, "learning_rate": 0.00017485107481711012, "loss": 1.8598, "step": 19 }, { "epoch": 0.4071246819338422, "grad_norm": 0.08220034092664719, "learning_rate": 0.00016927243535095997, "loss": 1.7883, "step": 20 }, { "epoch": 0.42748091603053434, "grad_norm": 0.08138112723827362, "learning_rate": 0.00016324453755953773, "loss": 1.839, "step": 21 }, { "epoch": 0.44783715012722647, "grad_norm": 0.089788056910038, "learning_rate": 0.00015680647467311557, "loss": 1.9511, "step": 22 }, { "epoch": 0.4681933842239186, "grad_norm": 0.08236150443553925, "learning_rate": 0.00015000000000000001, "loss": 1.7003, "step": 23 }, { "epoch": 0.48854961832061067, "grad_norm": 0.0797291249036789, "learning_rate": 0.00014286925614030542, "loss": 1.8304, "step": 24 }, { "epoch": 0.5089058524173028, "grad_norm": 0.11056578904390335, "learning_rate": 0.00013546048870425356, "loss": 1.8363, "step": 25 }, { "epoch": 0.5292620865139949, "grad_norm": 0.08862095326185226, "learning_rate": 0.0001278217463916453, "loss": 1.8398, "step": 26 }, { "epoch": 0.549618320610687, "grad_norm": 0.07724796235561371, "learning_rate": 0.00012000256937760445, "loss": 1.8102, "step": 27 }, { "epoch": 0.5699745547073791, "grad_norm": 0.08631068468093872, "learning_rate": 0.0001120536680255323, "loss": 1.7571, "step": 28 }, { "epoch": 0.5903307888040712, "grad_norm": 0.0852205753326416, "learning_rate": 0.00010402659401094152, "loss": 1.7623, "step": 29 }, { "epoch": 0.6106870229007634, "grad_norm": 0.09490972757339478, "learning_rate": 9.597340598905852e-05, "loss": 1.7686, "step": 30 }, { "epoch": 0.6310432569974554, "grad_norm": 0.0830993726849556, "learning_rate": 8.79463319744677e-05, "loss": 1.8706, "step": 31 }, { "epoch": 0.6513994910941476, "grad_norm": 0.1042318046092987, "learning_rate": 7.999743062239557e-05, "loss": 1.8053, "step": 32 }, { "epoch": 0.6717557251908397, "grad_norm": 0.07309404015541077, "learning_rate": 7.217825360835473e-05, "loss": 1.7835, "step": 33 }, { "epoch": 0.6921119592875318, "grad_norm": 0.07798247784376144, "learning_rate": 6.453951129574644e-05, "loss": 1.8351, "step": 34 }, { "epoch": 0.712468193384224, "grad_norm": 0.10470892488956451, "learning_rate": 5.713074385969457e-05, "loss": 1.6869, "step": 35 }, { "epoch": 0.732824427480916, "grad_norm": 0.062327221035957336, "learning_rate": 5.000000000000002e-05, "loss": 1.8249, "step": 36 }, { "epoch": 0.7531806615776081, "grad_norm": 0.09498722851276398, "learning_rate": 4.3193525326884435e-05, "loss": 1.6566, "step": 37 }, { "epoch": 0.7735368956743003, "grad_norm": 0.07665014266967773, "learning_rate": 3.675546244046228e-05, "loss": 1.766, "step": 38 }, { "epoch": 0.7938931297709924, "grad_norm": 0.07265973836183548, "learning_rate": 3.072756464904006e-05, "loss": 1.789, "step": 39 }, { "epoch": 0.8142493638676844, "grad_norm": 0.07454241812229156, "learning_rate": 2.514892518288988e-05, "loss": 1.831, "step": 40 }, { "epoch": 0.8346055979643766, "grad_norm": 0.07916758209466934, "learning_rate": 2.0055723659649904e-05, "loss": 1.8673, "step": 41 }, { "epoch": 0.8549618320610687, "grad_norm": 0.0858256071805954, "learning_rate": 1.5480991445620542e-05, "loss": 1.6331, "step": 42 }, { "epoch": 0.8753180661577609, "grad_norm": 0.09055773913860321, "learning_rate": 1.1454397434679021e-05, "loss": 1.6856, "step": 43 }, { "epoch": 0.8956743002544529, "grad_norm": 0.08801382780075073, "learning_rate": 8.002055634117578e-06, "loss": 1.7838, "step": 44 }, { "epoch": 0.916030534351145, "grad_norm": 0.08115601539611816, "learning_rate": 5.146355805285452e-06, "loss": 1.7345, "step": 45 }, { "epoch": 0.9363867684478372, "grad_norm": 0.07437321543693542, "learning_rate": 2.905818257394799e-06, "loss": 1.84, "step": 46 }, { "epoch": 0.9567430025445293, "grad_norm": 0.08822711557149887, "learning_rate": 1.2949737362087156e-06, "loss": 1.6885, "step": 47 }, { "epoch": 0.9770992366412213, "grad_norm": 0.09608830511569977, "learning_rate": 3.2426918657900704e-07, "loss": 1.6825, "step": 48 }, { "epoch": 0.9974554707379135, "grad_norm": 0.07894308120012283, "learning_rate": 0.0, "loss": 1.6886, "step": 49 } ], "logging_steps": 1, "max_steps": 49, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.32140082998018e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }