{ "best_metric": null, "best_model_checkpoint": null, "epoch": 23.529411764705884, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5882352941176471, "grad_norm": 0.20885063707828522, "learning_rate": 4.9980725906018074e-05, "loss": 0.8318, "num_input_tokens_seen": 121824, "step": 5 }, { "epoch": 1.1764705882352942, "grad_norm": 0.21794493496418, "learning_rate": 4.99229333433282e-05, "loss": 0.7891, "num_input_tokens_seen": 239760, "step": 10 }, { "epoch": 1.7647058823529411, "grad_norm": 0.20168891549110413, "learning_rate": 4.982671142387316e-05, "loss": 0.7678, "num_input_tokens_seen": 364912, "step": 15 }, { "epoch": 2.3529411764705883, "grad_norm": 0.20661190152168274, "learning_rate": 4.9692208514878444e-05, "loss": 0.728, "num_input_tokens_seen": 487440, "step": 20 }, { "epoch": 2.9411764705882355, "grad_norm": 0.2073347568511963, "learning_rate": 4.951963201008076e-05, "loss": 0.7364, "num_input_tokens_seen": 607888, "step": 25 }, { "epoch": 3.5294117647058822, "grad_norm": 0.19631442427635193, "learning_rate": 4.9309248009941914e-05, "loss": 0.7217, "num_input_tokens_seen": 728656, "step": 30 }, { "epoch": 4.117647058823529, "grad_norm": 0.22293810546398163, "learning_rate": 4.906138091134118e-05, "loss": 0.6901, "num_input_tokens_seen": 849216, "step": 35 }, { "epoch": 4.705882352941177, "grad_norm": 0.2156902402639389, "learning_rate": 4.877641290737884e-05, "loss": 0.6761, "num_input_tokens_seen": 971440, "step": 40 }, { "epoch": 5.294117647058823, "grad_norm": 0.22460030019283295, "learning_rate": 4.8454783398062106e-05, "loss": 0.6601, "num_input_tokens_seen": 1091264, "step": 45 }, { "epoch": 5.882352941176471, "grad_norm": 0.2591679096221924, "learning_rate": 4.8096988312782174e-05, "loss": 0.6439, "num_input_tokens_seen": 1211184, "step": 50 }, { "epoch": 6.470588235294118, "grad_norm": 0.26881489157676697, "learning_rate": 4.7703579345627035e-05, "loss": 0.6181, "num_input_tokens_seen": 1334112, "step": 55 }, { "epoch": 7.0588235294117645, "grad_norm": 0.3284054100513458, "learning_rate": 4.72751631047092e-05, "loss": 0.6145, "num_input_tokens_seen": 1454512, "step": 60 }, { "epoch": 7.647058823529412, "grad_norm": 0.2977285385131836, "learning_rate": 4.681240017681993e-05, "loss": 0.5834, "num_input_tokens_seen": 1576640, "step": 65 }, { "epoch": 8.235294117647058, "grad_norm": 0.3388771116733551, "learning_rate": 4.6316004108852305e-05, "loss": 0.5632, "num_input_tokens_seen": 1698624, "step": 70 }, { "epoch": 8.823529411764707, "grad_norm": 0.3815699815750122, "learning_rate": 4.5786740307563636e-05, "loss": 0.5549, "num_input_tokens_seen": 1818688, "step": 75 }, { "epoch": 9.411764705882353, "grad_norm": 0.37038519978523254, "learning_rate": 4.522542485937369e-05, "loss": 0.5151, "num_input_tokens_seen": 1942112, "step": 80 }, { "epoch": 10.0, "grad_norm": 0.4679271876811981, "learning_rate": 4.463292327201862e-05, "loss": 0.5147, "num_input_tokens_seen": 2061552, "step": 85 }, { "epoch": 10.588235294117647, "grad_norm": 0.4134647846221924, "learning_rate": 4.401014914000078e-05, "loss": 0.4635, "num_input_tokens_seen": 2185344, "step": 90 }, { "epoch": 11.176470588235293, "grad_norm": 0.45239707827568054, "learning_rate": 4.335806273589214e-05, "loss": 0.4585, "num_input_tokens_seen": 2306256, "step": 95 }, { "epoch": 11.764705882352942, "grad_norm": 0.5336123704910278, "learning_rate": 4.267766952966369e-05, "loss": 0.4291, "num_input_tokens_seen": 2426592, "step": 100 }, { "epoch": 12.352941176470589, "grad_norm": 0.5823401212692261, "learning_rate": 4.197001863832355e-05, "loss": 0.3997, "num_input_tokens_seen": 2548672, "step": 105 }, { "epoch": 12.941176470588236, "grad_norm": 0.5824088454246521, "learning_rate": 4.123620120825459e-05, "loss": 0.3797, "num_input_tokens_seen": 2667984, "step": 110 }, { "epoch": 13.529411764705882, "grad_norm": 0.7273723483085632, "learning_rate": 4.047734873274586e-05, "loss": 0.3412, "num_input_tokens_seen": 2791904, "step": 115 }, { "epoch": 14.117647058823529, "grad_norm": 0.6384756565093994, "learning_rate": 3.969463130731183e-05, "loss": 0.3298, "num_input_tokens_seen": 2910560, "step": 120 }, { "epoch": 14.705882352941176, "grad_norm": 0.684781014919281, "learning_rate": 3.888925582549006e-05, "loss": 0.2863, "num_input_tokens_seen": 3034368, "step": 125 }, { "epoch": 15.294117647058824, "grad_norm": 0.7853628396987915, "learning_rate": 3.8062464117898724e-05, "loss": 0.2738, "num_input_tokens_seen": 3153984, "step": 130 }, { "epoch": 15.882352941176471, "grad_norm": 0.7987646460533142, "learning_rate": 3.721553103742388e-05, "loss": 0.2367, "num_input_tokens_seen": 3278336, "step": 135 }, { "epoch": 16.470588235294116, "grad_norm": 0.74590665102005, "learning_rate": 3.634976249348867e-05, "loss": 0.2189, "num_input_tokens_seen": 3398224, "step": 140 }, { "epoch": 17.058823529411764, "grad_norm": 0.8422712683677673, "learning_rate": 3.54664934384357e-05, "loss": 0.1971, "num_input_tokens_seen": 3519168, "step": 145 }, { "epoch": 17.647058823529413, "grad_norm": 0.8479442000389099, "learning_rate": 3.456708580912725e-05, "loss": 0.1705, "num_input_tokens_seen": 3641392, "step": 150 }, { "epoch": 18.235294117647058, "grad_norm": 0.8197467923164368, "learning_rate": 3.365292642693732e-05, "loss": 0.1454, "num_input_tokens_seen": 3764240, "step": 155 }, { "epoch": 18.823529411764707, "grad_norm": 1.0131207704544067, "learning_rate": 3.272542485937369e-05, "loss": 0.1387, "num_input_tokens_seen": 3882896, "step": 160 }, { "epoch": 19.41176470588235, "grad_norm": 0.858586311340332, "learning_rate": 3.178601124662686e-05, "loss": 0.1191, "num_input_tokens_seen": 4004560, "step": 165 }, { "epoch": 20.0, "grad_norm": 0.9852003455162048, "learning_rate": 3.083613409639764e-05, "loss": 0.1101, "num_input_tokens_seen": 4127360, "step": 170 }, { "epoch": 20.58823529411765, "grad_norm": 0.885619580745697, "learning_rate": 2.9877258050403212e-05, "loss": 0.0885, "num_input_tokens_seen": 4248640, "step": 175 }, { "epoch": 21.176470588235293, "grad_norm": 0.6304395794868469, "learning_rate": 2.8910861626005776e-05, "loss": 0.0865, "num_input_tokens_seen": 4369872, "step": 180 }, { "epoch": 21.764705882352942, "grad_norm": 0.7621514797210693, "learning_rate": 2.7938434936445945e-05, "loss": 0.0708, "num_input_tokens_seen": 4490688, "step": 185 }, { "epoch": 22.352941176470587, "grad_norm": 0.8263904452323914, "learning_rate": 2.6961477393196126e-05, "loss": 0.0715, "num_input_tokens_seen": 4612144, "step": 190 }, { "epoch": 22.941176470588236, "grad_norm": 0.5912930965423584, "learning_rate": 2.598149539397672e-05, "loss": 0.0584, "num_input_tokens_seen": 4733632, "step": 195 }, { "epoch": 23.529411764705884, "grad_norm": 0.6392534971237183, "learning_rate": 2.5e-05, "loss": 0.0517, "num_input_tokens_seen": 4854048, "step": 200 } ], "logging_steps": 5, "max_steps": 400, "num_input_tokens_seen": 4854048, "num_train_epochs": 50, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.807606338473165e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }