|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.05841974587410545, |
|
"eval_steps": 5, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011683949174821088, |
|
"grad_norm": 0.5629591941833496, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6259, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011683949174821088, |
|
"eval_loss": 1.7091846466064453, |
|
"eval_runtime": 32.7497, |
|
"eval_samples_per_second": 22.016, |
|
"eval_steps_per_second": 11.023, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023367898349642177, |
|
"grad_norm": 0.4464726150035858, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9928, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0035051847524463268, |
|
"grad_norm": 0.554092288017273, |
|
"learning_rate": 6e-05, |
|
"loss": 1.9481, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004673579669928435, |
|
"grad_norm": 0.5418124198913574, |
|
"learning_rate": 8e-05, |
|
"loss": 2.0157, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0058419745874105445, |
|
"grad_norm": 0.6429816484451294, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9883, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0058419745874105445, |
|
"eval_loss": 1.6724804639816284, |
|
"eval_runtime": 31.1462, |
|
"eval_samples_per_second": 23.149, |
|
"eval_steps_per_second": 11.59, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0070103695048926535, |
|
"grad_norm": 0.7108197212219238, |
|
"learning_rate": 0.00012, |
|
"loss": 1.9129, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.008178764422374763, |
|
"grad_norm": 0.6660429835319519, |
|
"learning_rate": 0.00014, |
|
"loss": 1.5668, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00934715933985687, |
|
"grad_norm": 0.7972020506858826, |
|
"learning_rate": 0.00016, |
|
"loss": 1.6182, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01051555425733898, |
|
"grad_norm": 0.9052525162696838, |
|
"learning_rate": 0.00018, |
|
"loss": 1.7629, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011683949174821089, |
|
"grad_norm": 0.6085521578788757, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7345, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011683949174821089, |
|
"eval_loss": 1.4248594045639038, |
|
"eval_runtime": 31.1654, |
|
"eval_samples_per_second": 23.135, |
|
"eval_steps_per_second": 11.583, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012852344092303198, |
|
"grad_norm": 0.6540700793266296, |
|
"learning_rate": 0.0001996917333733128, |
|
"loss": 1.3627, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.014020739009785307, |
|
"grad_norm": 1.1813312768936157, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 1.4923, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.015189133927267416, |
|
"grad_norm": 0.9703741669654846, |
|
"learning_rate": 0.00019723699203976766, |
|
"loss": 1.5092, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.016357528844749527, |
|
"grad_norm": 1.16099214553833, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 1.2979, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.017525923762231634, |
|
"grad_norm": 1.7793681621551514, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 1.8037, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.017525923762231634, |
|
"eval_loss": 1.2205755710601807, |
|
"eval_runtime": 31.1965, |
|
"eval_samples_per_second": 23.112, |
|
"eval_steps_per_second": 11.572, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01869431867971374, |
|
"grad_norm": 1.3740328550338745, |
|
"learning_rate": 0.0001891006524188368, |
|
"loss": 1.2364, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.019862713597195852, |
|
"grad_norm": 0.6489018201828003, |
|
"learning_rate": 0.00018526401643540922, |
|
"loss": 1.7165, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02103110851467796, |
|
"grad_norm": 1.0925291776657104, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.4376, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02219950343216007, |
|
"grad_norm": 0.28496918082237244, |
|
"learning_rate": 0.0001760405965600031, |
|
"loss": 1.5726, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.023367898349642178, |
|
"grad_norm": 0.37671753764152527, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 1.1192, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.023367898349642178, |
|
"eval_loss": 1.2007029056549072, |
|
"eval_runtime": 31.1968, |
|
"eval_samples_per_second": 23.111, |
|
"eval_steps_per_second": 11.572, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02453629326712429, |
|
"grad_norm": 0.3938184380531311, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 1.4101, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.025704688184606396, |
|
"grad_norm": 0.42164233326911926, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 1.1861, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.026873083102088507, |
|
"grad_norm": 0.3876625597476959, |
|
"learning_rate": 0.0001522498564715949, |
|
"loss": 1.3193, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.028041478019570614, |
|
"grad_norm": 0.511690080165863, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 1.1979, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.029209872937052725, |
|
"grad_norm": 0.4257306456565857, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 1.4168, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.029209872937052725, |
|
"eval_loss": 1.1946120262145996, |
|
"eval_runtime": 31.1806, |
|
"eval_samples_per_second": 23.123, |
|
"eval_steps_per_second": 11.578, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.030378267854534832, |
|
"grad_norm": 0.3946686387062073, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 1.3165, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03154666277201694, |
|
"grad_norm": 0.37801477313041687, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 1.6097, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.032715057689499054, |
|
"grad_norm": 0.3698787987232208, |
|
"learning_rate": 0.0001156434465040231, |
|
"loss": 1.4314, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03388345260698116, |
|
"grad_norm": 0.36553719639778137, |
|
"learning_rate": 0.0001078459095727845, |
|
"loss": 1.6258, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03505184752446327, |
|
"grad_norm": 0.3180171847343445, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8529, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03505184752446327, |
|
"eval_loss": 1.188832402229309, |
|
"eval_runtime": 31.1804, |
|
"eval_samples_per_second": 23.123, |
|
"eval_steps_per_second": 11.578, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.036220242441945376, |
|
"grad_norm": 0.3740488886833191, |
|
"learning_rate": 9.215409042721552e-05, |
|
"loss": 0.7608, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03738863735942748, |
|
"grad_norm": 0.3934629261493683, |
|
"learning_rate": 8.435655349597689e-05, |
|
"loss": 1.1501, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0385570322769096, |
|
"grad_norm": 0.3714420199394226, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 1.2233, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.039725427194391705, |
|
"grad_norm": 0.3141462206840515, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 1.3308, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04089382211187381, |
|
"grad_norm": 0.3738103210926056, |
|
"learning_rate": 6.173165676349103e-05, |
|
"loss": 1.5121, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04089382211187381, |
|
"eval_loss": 1.1856106519699097, |
|
"eval_runtime": 31.1551, |
|
"eval_samples_per_second": 23.142, |
|
"eval_steps_per_second": 11.587, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04206221702935592, |
|
"grad_norm": 0.3965364992618561, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 1.6638, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.043230611946838034, |
|
"grad_norm": 0.40209704637527466, |
|
"learning_rate": 4.7750143528405126e-05, |
|
"loss": 1.2929, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04439900686432014, |
|
"grad_norm": 0.4041275084018707, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 1.3061, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04556740178180225, |
|
"grad_norm": 0.3122788965702057, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 1.3712, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.046735796699284356, |
|
"grad_norm": 0.2880614697933197, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 1.7083, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.046735796699284356, |
|
"eval_loss": 1.18394935131073, |
|
"eval_runtime": 31.17, |
|
"eval_samples_per_second": 23.131, |
|
"eval_steps_per_second": 11.582, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04790419161676647, |
|
"grad_norm": 0.4715648293495178, |
|
"learning_rate": 2.3959403439996907e-05, |
|
"loss": 1.2266, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04907258653424858, |
|
"grad_norm": 0.3626469075679779, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 1.3755, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.050240981451730685, |
|
"grad_norm": 0.3921823799610138, |
|
"learning_rate": 1.4735983564590783e-05, |
|
"loss": 1.3125, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05140937636921279, |
|
"grad_norm": 0.35432127118110657, |
|
"learning_rate": 1.0899347581163221e-05, |
|
"loss": 1.2351, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.052577771286694906, |
|
"grad_norm": 0.4417038559913635, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 1.7417, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.052577771286694906, |
|
"eval_loss": 1.1834783554077148, |
|
"eval_runtime": 31.2195, |
|
"eval_samples_per_second": 23.095, |
|
"eval_steps_per_second": 11.563, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.053746166204177014, |
|
"grad_norm": 0.3598865866661072, |
|
"learning_rate": 4.8943483704846475e-06, |
|
"loss": 1.4514, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05491456112165912, |
|
"grad_norm": 0.43709179759025574, |
|
"learning_rate": 2.7630079602323442e-06, |
|
"loss": 1.5828, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05608295603914123, |
|
"grad_norm": 0.32742375135421753, |
|
"learning_rate": 1.231165940486234e-06, |
|
"loss": 1.5362, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.057251350956623336, |
|
"grad_norm": 0.31574079394340515, |
|
"learning_rate": 3.0826662668720364e-07, |
|
"loss": 1.5458, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05841974587410545, |
|
"grad_norm": 0.3892696797847748, |
|
"learning_rate": 0.0, |
|
"loss": 1.5221, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05841974587410545, |
|
"eval_loss": 1.1832040548324585, |
|
"eval_runtime": 31.1605, |
|
"eval_samples_per_second": 23.138, |
|
"eval_steps_per_second": 11.585, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3069219818110976e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|