{ "best_metric": 11.734221458435059, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 2.017699115044248, "eval_steps": 25, "global_step": 57, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.035398230088495575, "grad_norm": 0.031247906386852264, "learning_rate": 2.9999999999999997e-05, "loss": 11.7618, "step": 1 }, { "epoch": 0.035398230088495575, "eval_loss": 11.758525848388672, "eval_runtime": 0.0557, "eval_samples_per_second": 897.074, "eval_steps_per_second": 35.883, "step": 1 }, { "epoch": 0.07079646017699115, "grad_norm": 0.033341653645038605, "learning_rate": 5.9999999999999995e-05, "loss": 11.7604, "step": 2 }, { "epoch": 0.10619469026548672, "grad_norm": 0.03511160612106323, "learning_rate": 8.999999999999999e-05, "loss": 11.7606, "step": 3 }, { "epoch": 0.1415929203539823, "grad_norm": 0.04077626019716263, "learning_rate": 0.00011999999999999999, "loss": 11.7602, "step": 4 }, { "epoch": 0.17699115044247787, "grad_norm": 0.04330646991729736, "learning_rate": 0.00015, "loss": 11.7584, "step": 5 }, { "epoch": 0.21238938053097345, "grad_norm": 0.043747056275606155, "learning_rate": 0.00017999999999999998, "loss": 11.7597, "step": 6 }, { "epoch": 0.24778761061946902, "grad_norm": 0.06513168662786484, "learning_rate": 0.00020999999999999998, "loss": 11.7631, "step": 7 }, { "epoch": 0.2831858407079646, "grad_norm": 0.02857106737792492, "learning_rate": 0.00023999999999999998, "loss": 11.7625, "step": 8 }, { "epoch": 0.3185840707964602, "grad_norm": 0.0340174175798893, "learning_rate": 0.00027, "loss": 11.758, "step": 9 }, { "epoch": 0.35398230088495575, "grad_norm": 0.03516850247979164, "learning_rate": 0.0003, "loss": 11.7586, "step": 10 }, { "epoch": 0.3893805309734513, "grad_norm": 0.04234551265835762, "learning_rate": 0.000299665031793473, "loss": 11.7581, "step": 11 }, { "epoch": 0.4247787610619469, "grad_norm": 0.043856628239154816, "learning_rate": 0.000298661623223217, "loss": 11.7554, "step": 12 }, { "epoch": 0.46017699115044247, "grad_norm": 0.055923618376255035, "learning_rate": 0.000296994255755488, "loss": 11.7555, "step": 13 }, { "epoch": 0.49557522123893805, "grad_norm": 0.07664037495851517, "learning_rate": 0.00029467037625815644, "loss": 11.7647, "step": 14 }, { "epoch": 0.5309734513274337, "grad_norm": 0.04599139839410782, "learning_rate": 0.00029170036374118777, "loss": 11.7596, "step": 15 }, { "epoch": 0.5663716814159292, "grad_norm": 0.04845264554023743, "learning_rate": 0.0002880974830014643, "loss": 11.7567, "step": 16 }, { "epoch": 0.6017699115044248, "grad_norm": 0.04592902958393097, "learning_rate": 0.00028387782537898215, "loss": 11.7575, "step": 17 }, { "epoch": 0.6371681415929203, "grad_norm": 0.06315469741821289, "learning_rate": 0.0002790602368890209, "loss": 11.7521, "step": 18 }, { "epoch": 0.672566371681416, "grad_norm": 0.07420399785041809, "learning_rate": 0.00027366623405126404, "loss": 11.7532, "step": 19 }, { "epoch": 0.7079646017699115, "grad_norm": 0.07503742724657059, "learning_rate": 0.0002677199077917991, "loss": 11.7526, "step": 20 }, { "epoch": 0.7433628318584071, "grad_norm": 0.09958707541227341, "learning_rate": 0.0002612478158471936, "loss": 11.7529, "step": 21 }, { "epoch": 0.7787610619469026, "grad_norm": 0.06647299975156784, "learning_rate": 0.0002542788641511963, "loss": 11.7575, "step": 22 }, { "epoch": 0.8141592920353983, "grad_norm": 0.07075267285108566, "learning_rate": 0.0002468441777338203, "loss": 11.7543, "step": 23 }, { "epoch": 0.8495575221238938, "grad_norm": 0.0799153745174408, "learning_rate": 0.00023897696170940326, "loss": 11.7512, "step": 24 }, { "epoch": 0.8849557522123894, "grad_norm": 0.0752144306898117, "learning_rate": 0.00023071235297450588, "loss": 11.752, "step": 25 }, { "epoch": 0.8849557522123894, "eval_loss": 11.748730659484863, "eval_runtime": 0.0544, "eval_samples_per_second": 918.418, "eval_steps_per_second": 36.737, "step": 25 }, { "epoch": 0.9203539823008849, "grad_norm": 0.09072871506214142, "learning_rate": 0.00022208726327800255, "loss": 11.7499, "step": 26 }, { "epoch": 0.9557522123893806, "grad_norm": 0.10960599780082703, "learning_rate": 0.00021314021436425024, "loss": 11.7441, "step": 27 }, { "epoch": 0.9911504424778761, "grad_norm": 0.12274184077978134, "learning_rate": 0.0002039111659256269, "loss": 11.7448, "step": 28 }, { "epoch": 1.0265486725663717, "grad_norm": 0.1580979973077774, "learning_rate": 0.0001944413371328451, "loss": 20.9127, "step": 29 }, { "epoch": 1.0619469026548674, "grad_norm": 0.0814850702881813, "learning_rate": 0.00018477302254012924, "loss": 11.5984, "step": 30 }, { "epoch": 1.0973451327433628, "grad_norm": 0.0942695289850235, "learning_rate": 0.0001749494031874695, "loss": 11.8138, "step": 31 }, { "epoch": 1.1327433628318584, "grad_norm": 0.09790118783712387, "learning_rate": 0.00016501435374361475, "loss": 11.7438, "step": 32 }, { "epoch": 1.168141592920354, "grad_norm": 0.1221451684832573, "learning_rate": 0.00015501224655115118, "loss": 11.5821, "step": 33 }, { "epoch": 1.2035398230088497, "grad_norm": 0.10308799147605896, "learning_rate": 0.00014498775344884884, "loss": 11.9058, "step": 34 }, { "epoch": 1.238938053097345, "grad_norm": 0.1409374475479126, "learning_rate": 0.00013498564625638522, "loss": 12.3076, "step": 35 }, { "epoch": 1.2743362831858407, "grad_norm": 0.10012196749448776, "learning_rate": 0.0001250505968125305, "loss": 11.4403, "step": 36 }, { "epoch": 1.3097345132743363, "grad_norm": 0.10341328382492065, "learning_rate": 0.00011522697745987075, "loss": 11.4606, "step": 37 }, { "epoch": 1.3451327433628317, "grad_norm": 0.0907343178987503, "learning_rate": 0.0001055586628671549, "loss": 11.6055, "step": 38 }, { "epoch": 1.3805309734513274, "grad_norm": 0.08441987633705139, "learning_rate": 9.608883407437309e-05, "loss": 11.7105, "step": 39 }, { "epoch": 1.415929203539823, "grad_norm": 0.10750183463096619, "learning_rate": 8.685978563574976e-05, "loss": 12.0368, "step": 40 }, { "epoch": 1.4513274336283186, "grad_norm": 0.1244698315858841, "learning_rate": 7.791273672199742e-05, "loss": 11.7444, "step": 41 }, { "epoch": 1.4867256637168142, "grad_norm": 0.17445337772369385, "learning_rate": 6.92876470254941e-05, "loss": 12.5033, "step": 42 }, { "epoch": 1.5221238938053099, "grad_norm": 0.10217167437076569, "learning_rate": 6.1023038290596715e-05, "loss": 11.0555, "step": 43 }, { "epoch": 1.5575221238938053, "grad_norm": 0.09945648908615112, "learning_rate": 5.315582226617963e-05, "loss": 11.5445, "step": 44 }, { "epoch": 1.592920353982301, "grad_norm": 0.08290676027536392, "learning_rate": 4.5721135848803653e-05, "loss": 11.5874, "step": 45 }, { "epoch": 1.6283185840707963, "grad_norm": 0.10276991873979568, "learning_rate": 3.875218415280636e-05, "loss": 11.7448, "step": 46 }, { "epoch": 1.663716814159292, "grad_norm": 0.10032013803720474, "learning_rate": 3.228009220820085e-05, "loss": 11.8132, "step": 47 }, { "epoch": 1.6991150442477876, "grad_norm": 0.10768602788448334, "learning_rate": 2.6333765948735986e-05, "loss": 11.9151, "step": 48 }, { "epoch": 1.7345132743362832, "grad_norm": 0.14142917096614838, "learning_rate": 2.0939763110979125e-05, "loss": 12.6256, "step": 49 }, { "epoch": 1.7699115044247788, "grad_norm": 0.08887699246406555, "learning_rate": 1.612217462101783e-05, "loss": 10.8969, "step": 50 }, { "epoch": 1.7699115044247788, "eval_loss": 11.734221458435059, "eval_runtime": 0.0543, "eval_samples_per_second": 920.765, "eval_steps_per_second": 36.831, "step": 50 }, { "epoch": 1.8053097345132745, "grad_norm": 0.10223124921321869, "learning_rate": 1.1902516998535666e-05, "loss": 11.5613, "step": 51 }, { "epoch": 1.8407079646017699, "grad_norm": 0.08183666318655014, "learning_rate": 8.299636258812197e-06, "loss": 11.7158, "step": 52 }, { "epoch": 1.8761061946902655, "grad_norm": 0.09326402842998505, "learning_rate": 5.329623741843531e-06, "loss": 11.7962, "step": 53 }, { "epoch": 1.911504424778761, "grad_norm": 0.10565122961997986, "learning_rate": 3.0057442445119872e-06, "loss": 11.7664, "step": 54 }, { "epoch": 1.9469026548672566, "grad_norm": 0.11125579476356506, "learning_rate": 1.3383767767829956e-06, "loss": 11.8781, "step": 55 }, { "epoch": 1.9823008849557522, "grad_norm": 0.13728636503219604, "learning_rate": 3.349682065270254e-07, "loss": 12.3971, "step": 56 }, { "epoch": 2.017699115044248, "grad_norm": 0.16288495063781738, "learning_rate": 0.0, "loss": 20.1629, "step": 57 } ], "logging_steps": 1, "max_steps": 57, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 98692671209472.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }