|
{ |
|
"best_metric": 11.734221458435059, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 2.017699115044248, |
|
"eval_steps": 25, |
|
"global_step": 57, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"grad_norm": 0.031247906386852264, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 11.7618, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"eval_loss": 11.758525848388672, |
|
"eval_runtime": 0.0557, |
|
"eval_samples_per_second": 897.074, |
|
"eval_steps_per_second": 35.883, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07079646017699115, |
|
"grad_norm": 0.033341653645038605, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 11.7604, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.10619469026548672, |
|
"grad_norm": 0.03511160612106323, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 11.7606, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 0.04077626019716263, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 11.7602, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 0.04330646991729736, |
|
"learning_rate": 0.00015, |
|
"loss": 11.7584, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.21238938053097345, |
|
"grad_norm": 0.043747056275606155, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 11.7597, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.24778761061946902, |
|
"grad_norm": 0.06513168662786484, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 11.7631, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.2831858407079646, |
|
"grad_norm": 0.02857106737792492, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 11.7625, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.3185840707964602, |
|
"grad_norm": 0.0340174175798893, |
|
"learning_rate": 0.00027, |
|
"loss": 11.758, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 0.03516850247979164, |
|
"learning_rate": 0.0003, |
|
"loss": 11.7586, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3893805309734513, |
|
"grad_norm": 0.04234551265835762, |
|
"learning_rate": 0.000299665031793473, |
|
"loss": 11.7581, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.4247787610619469, |
|
"grad_norm": 0.043856628239154816, |
|
"learning_rate": 0.000298661623223217, |
|
"loss": 11.7554, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.46017699115044247, |
|
"grad_norm": 0.055923618376255035, |
|
"learning_rate": 0.000296994255755488, |
|
"loss": 11.7555, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.49557522123893805, |
|
"grad_norm": 0.07664037495851517, |
|
"learning_rate": 0.00029467037625815644, |
|
"loss": 11.7647, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 0.04599139839410782, |
|
"learning_rate": 0.00029170036374118777, |
|
"loss": 11.7596, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5663716814159292, |
|
"grad_norm": 0.04845264554023743, |
|
"learning_rate": 0.0002880974830014643, |
|
"loss": 11.7567, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.6017699115044248, |
|
"grad_norm": 0.04592902958393097, |
|
"learning_rate": 0.00028387782537898215, |
|
"loss": 11.7575, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.6371681415929203, |
|
"grad_norm": 0.06315469741821289, |
|
"learning_rate": 0.0002790602368890209, |
|
"loss": 11.7521, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.672566371681416, |
|
"grad_norm": 0.07420399785041809, |
|
"learning_rate": 0.00027366623405126404, |
|
"loss": 11.7532, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 0.07503742724657059, |
|
"learning_rate": 0.0002677199077917991, |
|
"loss": 11.7526, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7433628318584071, |
|
"grad_norm": 0.09958707541227341, |
|
"learning_rate": 0.0002612478158471936, |
|
"loss": 11.7529, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.7787610619469026, |
|
"grad_norm": 0.06647299975156784, |
|
"learning_rate": 0.0002542788641511963, |
|
"loss": 11.7575, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.8141592920353983, |
|
"grad_norm": 0.07075267285108566, |
|
"learning_rate": 0.0002468441777338203, |
|
"loss": 11.7543, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.8495575221238938, |
|
"grad_norm": 0.0799153745174408, |
|
"learning_rate": 0.00023897696170940326, |
|
"loss": 11.7512, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 0.0752144306898117, |
|
"learning_rate": 0.00023071235297450588, |
|
"loss": 11.752, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"eval_loss": 11.748730659484863, |
|
"eval_runtime": 0.0544, |
|
"eval_samples_per_second": 918.418, |
|
"eval_steps_per_second": 36.737, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.9203539823008849, |
|
"grad_norm": 0.09072871506214142, |
|
"learning_rate": 0.00022208726327800255, |
|
"loss": 11.7499, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.9557522123893806, |
|
"grad_norm": 0.10960599780082703, |
|
"learning_rate": 0.00021314021436425024, |
|
"loss": 11.7441, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.9911504424778761, |
|
"grad_norm": 0.12274184077978134, |
|
"learning_rate": 0.0002039111659256269, |
|
"loss": 11.7448, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.0265486725663717, |
|
"grad_norm": 0.1580979973077774, |
|
"learning_rate": 0.0001944413371328451, |
|
"loss": 20.9127, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.0619469026548674, |
|
"grad_norm": 0.0814850702881813, |
|
"learning_rate": 0.00018477302254012924, |
|
"loss": 11.5984, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0973451327433628, |
|
"grad_norm": 0.0942695289850235, |
|
"learning_rate": 0.0001749494031874695, |
|
"loss": 11.8138, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.1327433628318584, |
|
"grad_norm": 0.09790118783712387, |
|
"learning_rate": 0.00016501435374361475, |
|
"loss": 11.7438, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.168141592920354, |
|
"grad_norm": 0.1221451684832573, |
|
"learning_rate": 0.00015501224655115118, |
|
"loss": 11.5821, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.2035398230088497, |
|
"grad_norm": 0.10308799147605896, |
|
"learning_rate": 0.00014498775344884884, |
|
"loss": 11.9058, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.238938053097345, |
|
"grad_norm": 0.1409374475479126, |
|
"learning_rate": 0.00013498564625638522, |
|
"loss": 12.3076, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.2743362831858407, |
|
"grad_norm": 0.10012196749448776, |
|
"learning_rate": 0.0001250505968125305, |
|
"loss": 11.4403, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.3097345132743363, |
|
"grad_norm": 0.10341328382492065, |
|
"learning_rate": 0.00011522697745987075, |
|
"loss": 11.4606, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.3451327433628317, |
|
"grad_norm": 0.0907343178987503, |
|
"learning_rate": 0.0001055586628671549, |
|
"loss": 11.6055, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.3805309734513274, |
|
"grad_norm": 0.08441987633705139, |
|
"learning_rate": 9.608883407437309e-05, |
|
"loss": 11.7105, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.415929203539823, |
|
"grad_norm": 0.10750183463096619, |
|
"learning_rate": 8.685978563574976e-05, |
|
"loss": 12.0368, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.4513274336283186, |
|
"grad_norm": 0.1244698315858841, |
|
"learning_rate": 7.791273672199742e-05, |
|
"loss": 11.7444, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.4867256637168142, |
|
"grad_norm": 0.17445337772369385, |
|
"learning_rate": 6.92876470254941e-05, |
|
"loss": 12.5033, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.5221238938053099, |
|
"grad_norm": 0.10217167437076569, |
|
"learning_rate": 6.1023038290596715e-05, |
|
"loss": 11.0555, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.5575221238938053, |
|
"grad_norm": 0.09945648908615112, |
|
"learning_rate": 5.315582226617963e-05, |
|
"loss": 11.5445, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.592920353982301, |
|
"grad_norm": 0.08290676027536392, |
|
"learning_rate": 4.5721135848803653e-05, |
|
"loss": 11.5874, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.6283185840707963, |
|
"grad_norm": 0.10276991873979568, |
|
"learning_rate": 3.875218415280636e-05, |
|
"loss": 11.7448, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.663716814159292, |
|
"grad_norm": 0.10032013803720474, |
|
"learning_rate": 3.228009220820085e-05, |
|
"loss": 11.8132, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.6991150442477876, |
|
"grad_norm": 0.10768602788448334, |
|
"learning_rate": 2.6333765948735986e-05, |
|
"loss": 11.9151, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.7345132743362832, |
|
"grad_norm": 0.14142917096614838, |
|
"learning_rate": 2.0939763110979125e-05, |
|
"loss": 12.6256, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"grad_norm": 0.08887699246406555, |
|
"learning_rate": 1.612217462101783e-05, |
|
"loss": 10.8969, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"eval_loss": 11.734221458435059, |
|
"eval_runtime": 0.0543, |
|
"eval_samples_per_second": 920.765, |
|
"eval_steps_per_second": 36.831, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.8053097345132745, |
|
"grad_norm": 0.10223124921321869, |
|
"learning_rate": 1.1902516998535666e-05, |
|
"loss": 11.5613, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.8407079646017699, |
|
"grad_norm": 0.08183666318655014, |
|
"learning_rate": 8.299636258812197e-06, |
|
"loss": 11.7158, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.8761061946902655, |
|
"grad_norm": 0.09326402842998505, |
|
"learning_rate": 5.329623741843531e-06, |
|
"loss": 11.7962, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.911504424778761, |
|
"grad_norm": 0.10565122961997986, |
|
"learning_rate": 3.0057442445119872e-06, |
|
"loss": 11.7664, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.9469026548672566, |
|
"grad_norm": 0.11125579476356506, |
|
"learning_rate": 1.3383767767829956e-06, |
|
"loss": 11.8781, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.9823008849557522, |
|
"grad_norm": 0.13728636503219604, |
|
"learning_rate": 3.349682065270254e-07, |
|
"loss": 12.3971, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.017699115044248, |
|
"grad_norm": 0.16288495063781738, |
|
"learning_rate": 0.0, |
|
"loss": 20.1629, |
|
"step": 57 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 57, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 98692671209472.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|