|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06506180871828236, |
|
"eval_steps": 5, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0013012361743656475, |
|
"grad_norm": 6.549399375915527, |
|
"learning_rate": 2e-05, |
|
"loss": 5.8478, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013012361743656475, |
|
"eval_loss": 5.85587739944458, |
|
"eval_runtime": 19.5633, |
|
"eval_samples_per_second": 66.195, |
|
"eval_steps_per_second": 8.281, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002602472348731295, |
|
"grad_norm": 5.998199939727783, |
|
"learning_rate": 4e-05, |
|
"loss": 5.5929, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003903708523096942, |
|
"grad_norm": 6.909670829772949, |
|
"learning_rate": 6e-05, |
|
"loss": 6.0899, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00520494469746259, |
|
"grad_norm": 5.808619976043701, |
|
"learning_rate": 8e-05, |
|
"loss": 5.6341, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.006506180871828237, |
|
"grad_norm": 6.830719947814941, |
|
"learning_rate": 0.0001, |
|
"loss": 5.4022, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006506180871828237, |
|
"eval_loss": 5.209164619445801, |
|
"eval_runtime": 19.5573, |
|
"eval_samples_per_second": 66.216, |
|
"eval_steps_per_second": 8.283, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007807417046193884, |
|
"grad_norm": 7.51235818862915, |
|
"learning_rate": 0.00012, |
|
"loss": 5.2535, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009108653220559532, |
|
"grad_norm": 10.392406463623047, |
|
"learning_rate": 0.00014, |
|
"loss": 4.4599, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01040988939492518, |
|
"grad_norm": 8.97028636932373, |
|
"learning_rate": 0.00016, |
|
"loss": 3.9407, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.011711125569290826, |
|
"grad_norm": 7.354924201965332, |
|
"learning_rate": 0.00018, |
|
"loss": 2.9676, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.013012361743656473, |
|
"grad_norm": 6.167208194732666, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2725, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013012361743656473, |
|
"eval_loss": 1.3080393075942993, |
|
"eval_runtime": 19.5559, |
|
"eval_samples_per_second": 66.22, |
|
"eval_steps_per_second": 8.284, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014313597918022121, |
|
"grad_norm": 5.213840484619141, |
|
"learning_rate": 0.0001996917333733128, |
|
"loss": 1.3746, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.015614834092387769, |
|
"grad_norm": 4.336225509643555, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 0.83, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.016916070266753416, |
|
"grad_norm": 4.607591152191162, |
|
"learning_rate": 0.00019723699203976766, |
|
"loss": 0.5198, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.018217306441119064, |
|
"grad_norm": 4.71095609664917, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 0.5275, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01951854261548471, |
|
"grad_norm": 3.6073062419891357, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 0.2262, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01951854261548471, |
|
"eval_loss": 0.16385002434253693, |
|
"eval_runtime": 19.5579, |
|
"eval_samples_per_second": 66.214, |
|
"eval_steps_per_second": 8.283, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02081977878985036, |
|
"grad_norm": 3.0078632831573486, |
|
"learning_rate": 0.0001891006524188368, |
|
"loss": 0.1252, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.022121014964216004, |
|
"grad_norm": 6.549544334411621, |
|
"learning_rate": 0.00018526401643540922, |
|
"loss": 0.1807, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02342225113858165, |
|
"grad_norm": 2.745868682861328, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.0516, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0247234873129473, |
|
"grad_norm": 0.7366054654121399, |
|
"learning_rate": 0.0001760405965600031, |
|
"loss": 0.0241, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.026024723487312947, |
|
"grad_norm": 0.8767889738082886, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.0172, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.026024723487312947, |
|
"eval_loss": 0.01193653978407383, |
|
"eval_runtime": 19.5552, |
|
"eval_samples_per_second": 66.223, |
|
"eval_steps_per_second": 8.284, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.027325959661678594, |
|
"grad_norm": 0.9785062074661255, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 0.011, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.028627195836044242, |
|
"grad_norm": 0.13425800204277039, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 0.0031, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02992843201040989, |
|
"grad_norm": 0.11440292745828629, |
|
"learning_rate": 0.0001522498564715949, |
|
"loss": 0.0023, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.031229668184775537, |
|
"grad_norm": 0.16419219970703125, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 0.0022, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03253090435914118, |
|
"grad_norm": 0.27950215339660645, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 0.0034, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03253090435914118, |
|
"eval_loss": 0.0018979853484779596, |
|
"eval_runtime": 19.5548, |
|
"eval_samples_per_second": 66.224, |
|
"eval_steps_per_second": 8.284, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03383214053350683, |
|
"grad_norm": 0.05157754197716713, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.0007, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03513337670787248, |
|
"grad_norm": 0.05505343899130821, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 0.001, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03643461288223813, |
|
"grad_norm": 0.03492051735520363, |
|
"learning_rate": 0.0001156434465040231, |
|
"loss": 0.0005, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03773584905660377, |
|
"grad_norm": 0.0410466231405735, |
|
"learning_rate": 0.0001078459095727845, |
|
"loss": 0.0007, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03903708523096942, |
|
"grad_norm": 0.027093220502138138, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03903708523096942, |
|
"eval_loss": 0.000504123920109123, |
|
"eval_runtime": 19.5547, |
|
"eval_samples_per_second": 66.225, |
|
"eval_steps_per_second": 8.284, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04033832140533507, |
|
"grad_norm": 0.01767433062195778, |
|
"learning_rate": 9.215409042721552e-05, |
|
"loss": 0.0004, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04163955757970072, |
|
"grad_norm": 0.010012276470661163, |
|
"learning_rate": 8.435655349597689e-05, |
|
"loss": 0.0002, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04294079375406636, |
|
"grad_norm": 0.04254867136478424, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 0.0005, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04424202992843201, |
|
"grad_norm": 0.005319009069353342, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 0.0002, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04554326610279766, |
|
"grad_norm": 0.015941577032208443, |
|
"learning_rate": 6.173165676349103e-05, |
|
"loss": 0.0002, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04554326610279766, |
|
"eval_loss": 0.00025833846302703023, |
|
"eval_runtime": 19.5515, |
|
"eval_samples_per_second": 66.235, |
|
"eval_steps_per_second": 8.286, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0468445022771633, |
|
"grad_norm": 0.10623931884765625, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 0.0012, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.048145738451528954, |
|
"grad_norm": 0.006040714215487242, |
|
"learning_rate": 4.7750143528405126e-05, |
|
"loss": 0.0002, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0494469746258946, |
|
"grad_norm": 0.010647974908351898, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 0.0003, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05074821080026025, |
|
"grad_norm": 0.003610523883253336, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 0.0001, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05204944697462589, |
|
"grad_norm": 0.0037193503230810165, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 0.0002, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05204944697462589, |
|
"eval_loss": 0.00016716058598831296, |
|
"eval_runtime": 19.5673, |
|
"eval_samples_per_second": 66.182, |
|
"eval_steps_per_second": 8.279, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.053350683148991544, |
|
"grad_norm": 0.00527266226708889, |
|
"learning_rate": 2.3959403439996907e-05, |
|
"loss": 0.0002, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05465191932335719, |
|
"grad_norm": 0.005722023546695709, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 0.0002, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05595315549772284, |
|
"grad_norm": 0.005843067076057196, |
|
"learning_rate": 1.4735983564590783e-05, |
|
"loss": 0.0002, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.057254391672088484, |
|
"grad_norm": 0.002936769975349307, |
|
"learning_rate": 1.0899347581163221e-05, |
|
"loss": 0.0001, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05855562784645413, |
|
"grad_norm": 0.0033105313777923584, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 0.0001, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05855562784645413, |
|
"eval_loss": 0.00015007508045528084, |
|
"eval_runtime": 19.5511, |
|
"eval_samples_per_second": 66.237, |
|
"eval_steps_per_second": 8.286, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05985686402081978, |
|
"grad_norm": 0.0019124951213598251, |
|
"learning_rate": 4.8943483704846475e-06, |
|
"loss": 0.0001, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06115810019518542, |
|
"grad_norm": 0.0029391671996563673, |
|
"learning_rate": 2.7630079602323442e-06, |
|
"loss": 0.0001, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.062459336369551074, |
|
"grad_norm": 0.002002465771511197, |
|
"learning_rate": 1.231165940486234e-06, |
|
"loss": 0.0001, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06376057254391672, |
|
"grad_norm": 0.0030635118018835783, |
|
"learning_rate": 3.0826662668720364e-07, |
|
"loss": 0.0001, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.06506180871828236, |
|
"grad_norm": 0.0038037565536797047, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06506180871828236, |
|
"eval_loss": 0.0001471501891501248, |
|
"eval_runtime": 19.5581, |
|
"eval_samples_per_second": 66.213, |
|
"eval_steps_per_second": 8.283, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 70, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.28019470188544e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|