{ "best_metric": 9.151042938232422, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.06575702778234424, "eval_steps": 25, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0026302811112937697, "grad_norm": 4.166772365570068, "learning_rate": 5e-05, "loss": 10.4191, "step": 1 }, { "epoch": 0.0026302811112937697, "eval_loss": 10.206449508666992, "eval_runtime": 0.2492, "eval_samples_per_second": 200.666, "eval_steps_per_second": 52.173, "step": 1 }, { "epoch": 0.005260562222587539, "grad_norm": 3.9618587493896484, "learning_rate": 0.0001, "loss": 10.2675, "step": 2 }, { "epoch": 0.007890843333881308, "grad_norm": 4.077890872955322, "learning_rate": 9.958086757163489e-05, "loss": 10.2374, "step": 3 }, { "epoch": 0.010521124445175079, "grad_norm": 4.005270957946777, "learning_rate": 9.833127793065098e-05, "loss": 10.141, "step": 4 }, { "epoch": 0.013151405556468848, "grad_norm": 3.936140775680542, "learning_rate": 9.627450856774539e-05, "loss": 10.0376, "step": 5 }, { "epoch": 0.015781686667762616, "grad_norm": 3.7704663276672363, "learning_rate": 9.3448873204592e-05, "loss": 9.9487, "step": 6 }, { "epoch": 0.018411967779056387, "grad_norm": 3.607231855392456, "learning_rate": 8.990700808169889e-05, "loss": 9.8131, "step": 7 }, { "epoch": 0.021042248890350158, "grad_norm": 3.440260648727417, "learning_rate": 8.571489144483944e-05, "loss": 9.8168, "step": 8 }, { "epoch": 0.023672530001643925, "grad_norm": 3.4772696495056152, "learning_rate": 8.095061449516903e-05, "loss": 9.6004, "step": 9 }, { "epoch": 0.026302811112937696, "grad_norm": 3.6518537998199463, "learning_rate": 7.570292669790186e-05, "loss": 9.4994, "step": 10 }, { "epoch": 0.028933092224231464, "grad_norm": 4.118347644805908, "learning_rate": 7.006958254769438e-05, "loss": 9.3983, "step": 11 }, { "epoch": 0.03156337333552523, "grad_norm": 4.641587257385254, "learning_rate": 6.415552058736854e-05, "loss": 9.2531, "step": 12 }, { "epoch": 0.034193654446819, "grad_norm": 3.165553092956543, "learning_rate": 5.80709086014102e-05, "loss": 9.49, "step": 13 }, { "epoch": 0.03682393555811277, "grad_norm": 3.1877760887145996, "learning_rate": 5.192909139858981e-05, "loss": 9.4558, "step": 14 }, { "epoch": 0.039454216669406544, "grad_norm": 3.2752976417541504, "learning_rate": 4.584447941263149e-05, "loss": 9.4161, "step": 15 }, { "epoch": 0.042084497780700315, "grad_norm": 3.2027299404144287, "learning_rate": 3.9930417452305626e-05, "loss": 9.3854, "step": 16 }, { "epoch": 0.04471477889199408, "grad_norm": 3.0393407344818115, "learning_rate": 3.4297073302098156e-05, "loss": 9.421, "step": 17 }, { "epoch": 0.04734506000328785, "grad_norm": 2.993825674057007, "learning_rate": 2.9049385504830985e-05, "loss": 9.3122, "step": 18 }, { "epoch": 0.04997534111458162, "grad_norm": 3.0169641971588135, "learning_rate": 2.4285108555160577e-05, "loss": 9.2748, "step": 19 }, { "epoch": 0.05260562222587539, "grad_norm": 3.0002386569976807, "learning_rate": 2.0092991918301108e-05, "loss": 9.2551, "step": 20 }, { "epoch": 0.05523590333716916, "grad_norm": 3.03594708442688, "learning_rate": 1.6551126795408016e-05, "loss": 9.1614, "step": 21 }, { "epoch": 0.05786618444846293, "grad_norm": 3.229689836502075, "learning_rate": 1.3725491432254624e-05, "loss": 9.0535, "step": 22 }, { "epoch": 0.0604964655597567, "grad_norm": 3.7371973991394043, "learning_rate": 1.1668722069349041e-05, "loss": 9.0129, "step": 23 }, { "epoch": 0.06312674667105046, "grad_norm": 4.012978553771973, "learning_rate": 1.0419132428365116e-05, "loss": 8.9554, "step": 24 }, { "epoch": 0.06575702778234424, "grad_norm": 5.139892101287842, "learning_rate": 1e-05, "loss": 8.7045, "step": 25 }, { "epoch": 0.06575702778234424, "eval_loss": 9.151042938232422, "eval_runtime": 0.2436, "eval_samples_per_second": 205.267, "eval_steps_per_second": 53.37, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 116006898892800.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }