{ "best_metric": 0.8565148115158081, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 1.0256410256410255, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020512820512820513, "grad_norm": 86.62631225585938, "learning_rate": 2e-05, "loss": 70.1471, "step": 1 }, { "epoch": 0.020512820512820513, "eval_loss": 1.1606084108352661, "eval_runtime": 2.4761, "eval_samples_per_second": 20.193, "eval_steps_per_second": 5.25, "step": 1 }, { "epoch": 0.041025641025641026, "grad_norm": 169.72178649902344, "learning_rate": 4e-05, "loss": 76.1978, "step": 2 }, { "epoch": 0.06153846153846154, "grad_norm": 224.21322631835938, "learning_rate": 6e-05, "loss": 77.2054, "step": 3 }, { "epoch": 0.08205128205128205, "grad_norm": 117.42841339111328, "learning_rate": 8e-05, "loss": 67.7335, "step": 4 }, { "epoch": 0.10256410256410256, "grad_norm": 99.99711608886719, "learning_rate": 0.0001, "loss": 61.9343, "step": 5 }, { "epoch": 0.12307692307692308, "grad_norm": 80.5895004272461, "learning_rate": 9.998898745083595e-05, "loss": 58.4254, "step": 6 }, { "epoch": 0.14358974358974358, "grad_norm": 98.62382507324219, "learning_rate": 9.995595519339882e-05, "loss": 58.7728, "step": 7 }, { "epoch": 0.1641025641025641, "grad_norm": 77.48738098144531, "learning_rate": 9.990091939521571e-05, "loss": 59.8276, "step": 8 }, { "epoch": 0.18461538461538463, "grad_norm": 98.08352661132812, "learning_rate": 9.982390699337252e-05, "loss": 63.9527, "step": 9 }, { "epoch": 0.20512820512820512, "grad_norm": 89.5567626953125, "learning_rate": 9.972495568132979e-05, "loss": 65.547, "step": 10 }, { "epoch": 0.22564102564102564, "grad_norm": 94.18596649169922, "learning_rate": 9.960411389047366e-05, "loss": 68.6032, "step": 11 }, { "epoch": 0.24615384615384617, "grad_norm": 161.9555206298828, "learning_rate": 9.946144076641138e-05, "loss": 67.0602, "step": 12 }, { "epoch": 0.26666666666666666, "grad_norm": 64.71723175048828, "learning_rate": 9.929700614002265e-05, "loss": 65.1495, "step": 13 }, { "epoch": 0.28717948717948716, "grad_norm": 71.39453887939453, "learning_rate": 9.911089049328113e-05, "loss": 65.5682, "step": 14 }, { "epoch": 0.3076923076923077, "grad_norm": 88.2963638305664, "learning_rate": 9.890318491986281e-05, "loss": 62.6678, "step": 15 }, { "epoch": 0.3282051282051282, "grad_norm": 74.54248809814453, "learning_rate": 9.867399108056054e-05, "loss": 57.3027, "step": 16 }, { "epoch": 0.3487179487179487, "grad_norm": 57.77771759033203, "learning_rate": 9.842342115352646e-05, "loss": 54.3357, "step": 17 }, { "epoch": 0.36923076923076925, "grad_norm": 46.135955810546875, "learning_rate": 9.815159777936681e-05, "loss": 55.2862, "step": 18 }, { "epoch": 0.38974358974358975, "grad_norm": 51.60411071777344, "learning_rate": 9.785865400111593e-05, "loss": 55.9271, "step": 19 }, { "epoch": 0.41025641025641024, "grad_norm": 49.10125732421875, "learning_rate": 9.754473319911866e-05, "loss": 53.1128, "step": 20 }, { "epoch": 0.4307692307692308, "grad_norm": 59.726318359375, "learning_rate": 9.720998902085355e-05, "loss": 57.8446, "step": 21 }, { "epoch": 0.4512820512820513, "grad_norm": 83.58087921142578, "learning_rate": 9.685458530573033e-05, "loss": 58.1853, "step": 22 }, { "epoch": 0.4717948717948718, "grad_norm": 75.08509826660156, "learning_rate": 9.647869600489953e-05, "loss": 61.0301, "step": 23 }, { "epoch": 0.49230769230769234, "grad_norm": 109.88655090332031, "learning_rate": 9.60825050961125e-05, "loss": 65.8346, "step": 24 }, { "epoch": 0.5128205128205128, "grad_norm": 371.794921875, "learning_rate": 9.566620649367419e-05, "loss": 65.1721, "step": 25 }, { "epoch": 0.5128205128205128, "eval_loss": 0.9023745059967041, "eval_runtime": 2.4751, "eval_samples_per_second": 20.201, "eval_steps_per_second": 5.252, "step": 25 }, { "epoch": 0.5333333333333333, "grad_norm": 72.81143188476562, "learning_rate": 9.523000395353242e-05, "loss": 61.1996, "step": 26 }, { "epoch": 0.5538461538461539, "grad_norm": 79.45490264892578, "learning_rate": 9.477411097355024e-05, "loss": 55.9212, "step": 27 }, { "epoch": 0.5743589743589743, "grad_norm": 60.01797866821289, "learning_rate": 9.429875068901018e-05, "loss": 54.4924, "step": 28 }, { "epoch": 0.5948717948717949, "grad_norm": 58.746456146240234, "learning_rate": 9.380415576340125e-05, "loss": 53.7485, "step": 29 }, { "epoch": 0.6153846153846154, "grad_norm": 41.1815185546875, "learning_rate": 9.329056827454279e-05, "loss": 53.355, "step": 30 }, { "epoch": 0.6358974358974359, "grad_norm": 41.690147399902344, "learning_rate": 9.27582395961002e-05, "loss": 53.0229, "step": 31 }, { "epoch": 0.6564102564102564, "grad_norm": 37.58351135253906, "learning_rate": 9.22074302745509e-05, "loss": 52.7548, "step": 32 }, { "epoch": 0.676923076923077, "grad_norm": 76.97421264648438, "learning_rate": 9.163840990166085e-05, "loss": 56.9977, "step": 33 }, { "epoch": 0.6974358974358974, "grad_norm": 74.90629577636719, "learning_rate": 9.105145698253366e-05, "loss": 57.3899, "step": 34 }, { "epoch": 0.717948717948718, "grad_norm": 64.42181396484375, "learning_rate": 9.044685879929732e-05, "loss": 57.688, "step": 35 }, { "epoch": 0.7384615384615385, "grad_norm": 100.76221466064453, "learning_rate": 8.98249112704948e-05, "loss": 64.6104, "step": 36 }, { "epoch": 0.7589743589743589, "grad_norm": 34.374786376953125, "learning_rate": 8.918591880624782e-05, "loss": 61.4561, "step": 37 }, { "epoch": 0.7794871794871795, "grad_norm": 56.67704391479492, "learning_rate": 8.853019415926429e-05, "loss": 60.2719, "step": 38 }, { "epoch": 0.8, "grad_norm": 63.6683464050293, "learning_rate": 8.785805827176255e-05, "loss": 52.9461, "step": 39 }, { "epoch": 0.8205128205128205, "grad_norm": 52.915809631347656, "learning_rate": 8.716984011838731e-05, "loss": 54.199, "step": 40 }, { "epoch": 0.841025641025641, "grad_norm": 36.469722747802734, "learning_rate": 8.646587654519414e-05, "loss": 50.3218, "step": 41 }, { "epoch": 0.8615384615384616, "grad_norm": 51.19331741333008, "learning_rate": 8.57465121047812e-05, "loss": 51.637, "step": 42 }, { "epoch": 0.882051282051282, "grad_norm": 35.982391357421875, "learning_rate": 8.501209888764928e-05, "loss": 51.5347, "step": 43 }, { "epoch": 0.9025641025641026, "grad_norm": 54.286808013916016, "learning_rate": 8.426299634987217e-05, "loss": 52.7197, "step": 44 }, { "epoch": 0.9230769230769231, "grad_norm": 43.39719772338867, "learning_rate": 8.349957113716212e-05, "loss": 52.1823, "step": 45 }, { "epoch": 0.9435897435897436, "grad_norm": 46.373233795166016, "learning_rate": 8.27221969054164e-05, "loss": 54.4859, "step": 46 }, { "epoch": 0.9641025641025641, "grad_norm": 61.711978912353516, "learning_rate": 8.193125413783259e-05, "loss": 61.241, "step": 47 }, { "epoch": 0.9846153846153847, "grad_norm": 74.2352523803711, "learning_rate": 8.112712995868243e-05, "loss": 60.6412, "step": 48 }, { "epoch": 1.005128205128205, "grad_norm": 42.651710510253906, "learning_rate": 8.031021794383513e-05, "loss": 54.8344, "step": 49 }, { "epoch": 1.0256410256410255, "grad_norm": 65.62483215332031, "learning_rate": 7.948091792812296e-05, "loss": 58.6391, "step": 50 }, { "epoch": 1.0256410256410255, "eval_loss": 0.8565148115158081, "eval_runtime": 2.4768, "eval_samples_per_second": 20.187, "eval_steps_per_second": 5.249, "step": 50 } ], "logging_steps": 1, "max_steps": 147, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.198420290049147e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }