RodrigoSalazar-U's picture
Training in progress, step 5478, checkpoint
1661213 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 5478,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.054764512595837894,
"grad_norm": 3.8052456378936768,
"learning_rate": 1.8248175182481753e-05,
"loss": 2.2411,
"step": 100
},
{
"epoch": 0.10952902519167579,
"grad_norm": 2.646080255508423,
"learning_rate": 3.649635036496351e-05,
"loss": 1.8231,
"step": 200
},
{
"epoch": 0.16429353778751368,
"grad_norm": 2.700340986251831,
"learning_rate": 5.474452554744526e-05,
"loss": 1.7334,
"step": 300
},
{
"epoch": 0.21905805038335158,
"grad_norm": 3.046970844268799,
"learning_rate": 7.299270072992701e-05,
"loss": 1.6861,
"step": 400
},
{
"epoch": 0.2738225629791895,
"grad_norm": 2.5365347862243652,
"learning_rate": 9.124087591240877e-05,
"loss": 1.6711,
"step": 500
},
{
"epoch": 0.32858707557502737,
"grad_norm": 2.219165802001953,
"learning_rate": 9.997255186358079e-05,
"loss": 1.6651,
"step": 600
},
{
"epoch": 0.3833515881708653,
"grad_norm": 2.6104276180267334,
"learning_rate": 9.976563458663239e-05,
"loss": 1.5924,
"step": 700
},
{
"epoch": 0.43811610076670315,
"grad_norm": 2.4602837562561035,
"learning_rate": 9.93567000457336e-05,
"loss": 1.5787,
"step": 800
},
{
"epoch": 0.4928806133625411,
"grad_norm": 2.33683705329895,
"learning_rate": 9.874740825864108e-05,
"loss": 1.5269,
"step": 900
},
{
"epoch": 0.547645125958379,
"grad_norm": 3.0263447761535645,
"learning_rate": 9.794023256786919e-05,
"loss": 1.5251,
"step": 1000
},
{
"epoch": 0.6024096385542169,
"grad_norm": 2.4210712909698486,
"learning_rate": 9.693844960047051e-05,
"loss": 1.4886,
"step": 1100
},
{
"epoch": 0.6571741511500547,
"grad_norm": 2.1745235919952393,
"learning_rate": 9.574612596698522e-05,
"loss": 1.4715,
"step": 1200
},
{
"epoch": 0.7119386637458927,
"grad_norm": 2.216663360595703,
"learning_rate": 9.436810175355263e-05,
"loss": 1.4191,
"step": 1300
},
{
"epoch": 0.7667031763417306,
"grad_norm": 2.010982036590576,
"learning_rate": 9.280997087419733e-05,
"loss": 1.4411,
"step": 1400
},
{
"epoch": 0.8214676889375685,
"grad_norm": 2.0168168544769287,
"learning_rate": 9.107805836304658e-05,
"loss": 1.4264,
"step": 1500
},
{
"epoch": 0.8762322015334063,
"grad_norm": 2.2413294315338135,
"learning_rate": 8.91793946986587e-05,
"loss": 1.4056,
"step": 1600
},
{
"epoch": 0.9309967141292442,
"grad_norm": 2.189241409301758,
"learning_rate": 8.712168726468965e-05,
"loss": 1.368,
"step": 1700
},
{
"epoch": 0.9857612267250822,
"grad_norm": 2.0814454555511475,
"learning_rate": 8.491328906274937e-05,
"loss": 1.3536,
"step": 1800
},
{
"epoch": 1.04052573932092,
"grad_norm": 2.4284451007843018,
"learning_rate": 8.256316480445412e-05,
"loss": 1.1463,
"step": 1900
},
{
"epoch": 1.095290251916758,
"grad_norm": 2.5235214233398438,
"learning_rate": 8.008085452031985e-05,
"loss": 1.0484,
"step": 2000
},
{
"epoch": 1.1500547645125958,
"grad_norm": 2.3191304206848145,
"learning_rate": 7.747643483322187e-05,
"loss": 1.0878,
"step": 2100
},
{
"epoch": 1.2048192771084336,
"grad_norm": 2.206143856048584,
"learning_rate": 7.476047805362569e-05,
"loss": 1.0593,
"step": 2200
},
{
"epoch": 1.2595837897042717,
"grad_norm": 2.5182549953460693,
"learning_rate": 7.194400926263763e-05,
"loss": 1.0407,
"step": 2300
},
{
"epoch": 1.3143483023001095,
"grad_norm": 2.556648015975952,
"learning_rate": 6.903846155709005e-05,
"loss": 1.0629,
"step": 2400
},
{
"epoch": 1.3691128148959475,
"grad_norm": 2.1211650371551514,
"learning_rate": 6.605562963833925e-05,
"loss": 1.0505,
"step": 2500
},
{
"epoch": 1.4238773274917853,
"grad_norm": 2.1135342121124268,
"learning_rate": 6.300762193317518e-05,
"loss": 0.9912,
"step": 2600
},
{
"epoch": 1.4786418400876231,
"grad_norm": 2.554298162460327,
"learning_rate": 5.9906811441203135e-05,
"loss": 1.0007,
"step": 2700
},
{
"epoch": 1.5334063526834611,
"grad_norm": 1.823740839958191,
"learning_rate": 5.67657855082255e-05,
"loss": 0.9865,
"step": 2800
},
{
"epoch": 1.588170865279299,
"grad_norm": 1.8818488121032715,
"learning_rate": 5.359729472951246e-05,
"loss": 0.9782,
"step": 2900
},
{
"epoch": 1.642935377875137,
"grad_norm": 2.093569040298462,
"learning_rate": 5.041420119038218e-05,
"loss": 0.9874,
"step": 3000
},
{
"epoch": 1.6976998904709748,
"grad_norm": 2.0732619762420654,
"learning_rate": 4.72294262542015e-05,
"loss": 0.984,
"step": 3100
},
{
"epoch": 1.7524644030668126,
"grad_norm": 1.9700407981872559,
"learning_rate": 4.405589810975468e-05,
"loss": 0.9442,
"step": 3200
},
{
"epoch": 1.8072289156626506,
"grad_norm": 1.8880764245986938,
"learning_rate": 4.090649929090541e-05,
"loss": 0.9511,
"step": 3300
},
{
"epoch": 1.8619934282584885,
"grad_norm": 1.8169358968734741,
"learning_rate": 3.7794014381589125e-05,
"loss": 0.9123,
"step": 3400
},
{
"epoch": 1.9167579408543265,
"grad_norm": 2.062743663787842,
"learning_rate": 3.473107811842055e-05,
"loss": 0.9308,
"step": 3500
},
{
"epoch": 1.9715224534501643,
"grad_norm": 1.9522345066070557,
"learning_rate": 3.173012410158744e-05,
"loss": 0.9195,
"step": 3600
},
{
"epoch": 2.026286966046002,
"grad_norm": 1.4607131481170654,
"learning_rate": 2.8803334322232017e-05,
"loss": 0.7255,
"step": 3700
},
{
"epoch": 2.08105147864184,
"grad_norm": 2.2295525074005127,
"learning_rate": 2.596258971120737e-05,
"loss": 0.5014,
"step": 3800
},
{
"epoch": 2.135815991237678,
"grad_norm": 1.6707744598388672,
"learning_rate": 2.3219421909949735e-05,
"loss": 0.5206,
"step": 3900
},
{
"epoch": 2.190580503833516,
"grad_norm": 1.4230283498764038,
"learning_rate": 2.0584966459246906e-05,
"loss": 0.4994,
"step": 4000
},
{
"epoch": 2.245345016429354,
"grad_norm": 1.987844467163086,
"learning_rate": 1.8069917595926504e-05,
"loss": 0.4941,
"step": 4100
},
{
"epoch": 2.3001095290251916,
"grad_norm": 1.8899997472763062,
"learning_rate": 1.568448484096205e-05,
"loss": 0.4926,
"step": 4200
},
{
"epoch": 2.3548740416210294,
"grad_norm": 2.2514610290527344,
"learning_rate": 1.3438351555220874e-05,
"loss": 0.4809,
"step": 4300
},
{
"epoch": 2.4096385542168672,
"grad_norm": 1.9894077777862549,
"learning_rate": 1.1340635631092428e-05,
"loss": 0.4834,
"step": 4400
},
{
"epoch": 2.4644030668127055,
"grad_norm": 2.134925603866577,
"learning_rate": 9.399852479563775e-06,
"loss": 0.494,
"step": 4500
},
{
"epoch": 2.5191675794085433,
"grad_norm": 1.5433528423309326,
"learning_rate": 7.623880462991801e-06,
"loss": 0.475,
"step": 4600
},
{
"epoch": 2.573932092004381,
"grad_norm": 1.8533953428268433,
"learning_rate": 6.019928913893208e-06,
"loss": 0.4692,
"step": 4700
},
{
"epoch": 2.628696604600219,
"grad_norm": 1.9672038555145264,
"learning_rate": 4.594508869576164e-06,
"loss": 0.4806,
"step": 4800
},
{
"epoch": 2.6834611171960567,
"grad_norm": 1.722920536994934,
"learning_rate": 3.35340664141246e-06,
"loss": 0.4629,
"step": 4900
},
{
"epoch": 2.738225629791895,
"grad_norm": 2.070141315460205,
"learning_rate": 2.301660326042443e-06,
"loss": 0.4596,
"step": 5000
},
{
"epoch": 2.792990142387733,
"grad_norm": 1.5731868743896484,
"learning_rate": 1.4435393538625497e-06,
"loss": 0.4652,
"step": 5100
},
{
"epoch": 2.8477546549835706,
"grad_norm": 1.4046640396118164,
"learning_rate": 7.825271578155602e-07,
"loss": 0.4419,
"step": 5200
},
{
"epoch": 2.9025191675794084,
"grad_norm": 2.1482276916503906,
"learning_rate": 3.2130703283768106e-07,
"loss": 0.4663,
"step": 5300
},
{
"epoch": 2.9572836801752462,
"grad_norm": 1.5635905265808105,
"learning_rate": 6.175124336402217e-08,
"loss": 0.4757,
"step": 5400
}
],
"logging_steps": 100,
"max_steps": 5478,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.633057747449823e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}