{ "best_metric": 3.0511791706085205, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.0071301247771836, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00028520499108734403, "grad_norm": 0.9442576766014099, "learning_rate": 2e-05, "loss": 3.3049, "step": 1 }, { "epoch": 0.00028520499108734403, "eval_loss": 3.7652459144592285, "eval_runtime": 857.4027, "eval_samples_per_second": 6.888, "eval_steps_per_second": 1.723, "step": 1 }, { "epoch": 0.0005704099821746881, "grad_norm": 0.9913172721862793, "learning_rate": 4e-05, "loss": 3.7418, "step": 2 }, { "epoch": 0.000855614973262032, "grad_norm": 0.9472721815109253, "learning_rate": 6e-05, "loss": 3.7158, "step": 3 }, { "epoch": 0.0011408199643493761, "grad_norm": 1.1189954280853271, "learning_rate": 8e-05, "loss": 3.9813, "step": 4 }, { "epoch": 0.00142602495543672, "grad_norm": 1.040862798690796, "learning_rate": 0.0001, "loss": 3.569, "step": 5 }, { "epoch": 0.00142602495543672, "eval_loss": 3.6927330493927, "eval_runtime": 862.1425, "eval_samples_per_second": 6.85, "eval_steps_per_second": 1.713, "step": 5 }, { "epoch": 0.001711229946524064, "grad_norm": 1.2287300825119019, "learning_rate": 9.938441702975689e-05, "loss": 3.6772, "step": 6 }, { "epoch": 0.0019964349376114083, "grad_norm": 1.2859654426574707, "learning_rate": 9.755282581475769e-05, "loss": 3.6225, "step": 7 }, { "epoch": 0.0022816399286987523, "grad_norm": 1.526451587677002, "learning_rate": 9.45503262094184e-05, "loss": 3.5456, "step": 8 }, { "epoch": 0.0025668449197860962, "grad_norm": 1.2330029010772705, "learning_rate": 9.045084971874738e-05, "loss": 2.9909, "step": 9 }, { "epoch": 0.00285204991087344, "grad_norm": 1.516096591949463, "learning_rate": 8.535533905932738e-05, "loss": 3.4418, "step": 10 }, { "epoch": 0.00285204991087344, "eval_loss": 3.3367996215820312, "eval_runtime": 862.3847, "eval_samples_per_second": 6.848, "eval_steps_per_second": 1.713, "step": 10 }, { "epoch": 0.003137254901960784, "grad_norm": 1.3773342370986938, "learning_rate": 7.938926261462366e-05, "loss": 3.1527, "step": 11 }, { "epoch": 0.003422459893048128, "grad_norm": 1.2679097652435303, "learning_rate": 7.269952498697734e-05, "loss": 3.2337, "step": 12 }, { "epoch": 0.0037076648841354726, "grad_norm": 1.1964778900146484, "learning_rate": 6.545084971874738e-05, "loss": 2.9654, "step": 13 }, { "epoch": 0.0039928698752228165, "grad_norm": 0.9561767578125, "learning_rate": 5.782172325201155e-05, "loss": 2.7436, "step": 14 }, { "epoch": 0.0042780748663101605, "grad_norm": 1.2431445121765137, "learning_rate": 5e-05, "loss": 3.2928, "step": 15 }, { "epoch": 0.0042780748663101605, "eval_loss": 3.126676559448242, "eval_runtime": 859.4772, "eval_samples_per_second": 6.872, "eval_steps_per_second": 1.718, "step": 15 }, { "epoch": 0.0045632798573975045, "grad_norm": 1.0335696935653687, "learning_rate": 4.2178276747988446e-05, "loss": 2.878, "step": 16 }, { "epoch": 0.0048484848484848485, "grad_norm": 0.8544740080833435, "learning_rate": 3.4549150281252636e-05, "loss": 2.7072, "step": 17 }, { "epoch": 0.0051336898395721925, "grad_norm": 0.9683927297592163, "learning_rate": 2.7300475013022663e-05, "loss": 3.0918, "step": 18 }, { "epoch": 0.0054188948306595364, "grad_norm": 1.025421380996704, "learning_rate": 2.061073738537635e-05, "loss": 3.0183, "step": 19 }, { "epoch": 0.00570409982174688, "grad_norm": 0.8975963592529297, "learning_rate": 1.4644660940672627e-05, "loss": 2.9735, "step": 20 }, { "epoch": 0.00570409982174688, "eval_loss": 3.0618274211883545, "eval_runtime": 867.1811, "eval_samples_per_second": 6.811, "eval_steps_per_second": 1.703, "step": 20 }, { "epoch": 0.005989304812834224, "grad_norm": 0.8859240412712097, "learning_rate": 9.549150281252633e-06, "loss": 3.0036, "step": 21 }, { "epoch": 0.006274509803921568, "grad_norm": 0.8403681516647339, "learning_rate": 5.449673790581611e-06, "loss": 2.8981, "step": 22 }, { "epoch": 0.006559714795008912, "grad_norm": 0.9174202680587769, "learning_rate": 2.4471741852423237e-06, "loss": 3.0085, "step": 23 }, { "epoch": 0.006844919786096256, "grad_norm": 1.1602686643600464, "learning_rate": 6.15582970243117e-07, "loss": 3.0675, "step": 24 }, { "epoch": 0.0071301247771836, "grad_norm": 0.8188506960868835, "learning_rate": 0.0, "loss": 2.8007, "step": 25 }, { "epoch": 0.0071301247771836, "eval_loss": 3.0511791706085205, "eval_runtime": 857.788, "eval_samples_per_second": 6.885, "eval_steps_per_second": 1.722, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.26729100951552e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }