{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.1432945499081444, "eval_steps": 500, "global_step": 1750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0612369871402327, "grad_norm": 0.8098917283898412, "learning_rate": 9.996294850025658e-06, "loss": 1.0009, "step": 50 }, { "epoch": 0.1224739742804654, "grad_norm": 0.8351009158740302, "learning_rate": 9.985184891357165e-06, "loss": 0.9902, "step": 100 }, { "epoch": 0.1837109614206981, "grad_norm": 0.7857771081781793, "learning_rate": 9.96668658961975e-06, "loss": 0.972, "step": 150 }, { "epoch": 0.2449479485609308, "grad_norm": 0.7820326486942377, "learning_rate": 9.940827360406297e-06, "loss": 0.9727, "step": 200 }, { "epoch": 0.3061849357011635, "grad_norm": 0.7529490157497454, "learning_rate": 9.907645528645791e-06, "loss": 0.9652, "step": 250 }, { "epoch": 0.3674219228413962, "grad_norm": 0.8000880788190894, "learning_rate": 9.867190271803466e-06, "loss": 0.9661, "step": 300 }, { "epoch": 0.4286589099816289, "grad_norm": 0.7925477606605517, "learning_rate": 9.819521546996864e-06, "loss": 0.9599, "step": 350 }, { "epoch": 0.4898958971218616, "grad_norm": 0.7624902603510945, "learning_rate": 9.764710002135784e-06, "loss": 0.9547, "step": 400 }, { "epoch": 0.5511328842620943, "grad_norm": 0.7431039272381413, "learning_rate": 9.702836871217838e-06, "loss": 0.9466, "step": 450 }, { "epoch": 0.612369871402327, "grad_norm": 0.7574806145615771, "learning_rate": 9.633993853934803e-06, "loss": 0.9439, "step": 500 }, { "epoch": 0.6736068585425597, "grad_norm": 0.748375839743952, "learning_rate": 9.558282979768164e-06, "loss": 0.9429, "step": 550 }, { "epoch": 0.7348438456827924, "grad_norm": 0.7952459759871126, "learning_rate": 9.475816456775313e-06, "loss": 0.9366, "step": 600 }, { "epoch": 0.7960808328230251, "grad_norm": 0.7782924796375454, "learning_rate": 9.386716505290467e-06, "loss": 0.9356, "step": 650 }, { "epoch": 0.8573178199632578, "grad_norm": 0.7443095452962357, "learning_rate": 9.291115176786814e-06, "loss": 0.9304, "step": 700 }, { "epoch": 0.9185548071034905, "grad_norm": 0.7822224402996899, "learning_rate": 9.189154158168293e-06, "loss": 0.9339, "step": 750 }, { "epoch": 0.9797917942437232, "grad_norm": 0.7817236760492132, "learning_rate": 9.08098456178111e-06, "loss": 0.9295, "step": 800 }, { "epoch": 1.0410287813839558, "grad_norm": 0.7824573951031337, "learning_rate": 8.966766701456177e-06, "loss": 0.9029, "step": 850 }, { "epoch": 1.1022657685241886, "grad_norm": 0.7621342939491088, "learning_rate": 8.846669854914395e-06, "loss": 0.8871, "step": 900 }, { "epoch": 1.1635027556644213, "grad_norm": 0.7431557953761879, "learning_rate": 8.720872012886918e-06, "loss": 0.882, "step": 950 }, { "epoch": 1.224739742804654, "grad_norm": 0.3886245437993066, "learning_rate": 8.58955961532221e-06, "loss": 0.8864, "step": 1000 }, { "epoch": 1.2859767299448868, "grad_norm": 0.7536794211653792, "learning_rate": 8.452927275070858e-06, "loss": 0.8805, "step": 1050 }, { "epoch": 1.3472137170851193, "grad_norm": 0.7577244481174435, "learning_rate": 8.311177489457653e-06, "loss": 0.8855, "step": 1100 }, { "epoch": 1.408450704225352, "grad_norm": 0.777343889968524, "learning_rate": 8.164520340168404e-06, "loss": 0.8862, "step": 1150 }, { "epoch": 1.4696876913655847, "grad_norm": 0.736623306272446, "learning_rate": 8.013173181896283e-06, "loss": 0.8839, "step": 1200 }, { "epoch": 1.5309246785058175, "grad_norm": 0.7451843323741638, "learning_rate": 7.857360320209126e-06, "loss": 0.8738, "step": 1250 }, { "epoch": 1.5921616656460502, "grad_norm": 0.8023889052201314, "learning_rate": 7.700553608573257e-06, "loss": 0.8846, "step": 1300 }, { "epoch": 1.653398652786283, "grad_norm": 0.7595177276389167, "learning_rate": 7.536585976390474e-06, "loss": 0.8818, "step": 1350 }, { "epoch": 1.7146356399265157, "grad_norm": 0.7673121148739883, "learning_rate": 7.3688589716215555e-06, "loss": 0.8775, "step": 1400 }, { "epoch": 1.7758726270667484, "grad_norm": 0.7577794985624852, "learning_rate": 7.197621175749467e-06, "loss": 0.8738, "step": 1450 }, { "epoch": 1.8371096142069812, "grad_norm": 0.7927915981856463, "learning_rate": 7.023126373460202e-06, "loss": 0.8771, "step": 1500 }, { "epoch": 1.8983466013472137, "grad_norm": 0.7595080397715767, "learning_rate": 6.849210730229846e-06, "loss": 0.8775, "step": 1550 }, { "epoch": 1.9595835884874464, "grad_norm": 0.7976605652572579, "learning_rate": 6.669034296168855e-06, "loss": 0.8751, "step": 1600 }, { "epoch": 2.020820575627679, "grad_norm": 0.7958737424052013, "learning_rate": 6.486384253156014e-06, "loss": 0.8579, "step": 1650 }, { "epoch": 2.0820575627679117, "grad_norm": 0.8331069734198607, "learning_rate": 6.301531299512195e-06, "loss": 0.837, "step": 1700 }, { "epoch": 2.1432945499081444, "grad_norm": 0.7511629331953945, "learning_rate": 6.11474939840398e-06, "loss": 0.8327, "step": 1750 } ], "logging_steps": 50, "max_steps": 4080, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 534961795891200.0, "train_batch_size": 48, "trial_name": null, "trial_params": null }