{ "best_metric": 1.7028899192810059, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.0019612842489261967, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.845136995704788e-05, "grad_norm": 0.9734085202217102, "learning_rate": 2e-05, "loss": 2.1146, "step": 1 }, { "epoch": 7.845136995704788e-05, "eval_loss": 2.1862733364105225, "eval_runtime": 1765.745, "eval_samples_per_second": 3.04, "eval_steps_per_second": 1.52, "step": 1 }, { "epoch": 0.00015690273991409576, "grad_norm": 1.1216297149658203, "learning_rate": 4e-05, "loss": 2.4345, "step": 2 }, { "epoch": 0.00023535410987114363, "grad_norm": 1.157699465751648, "learning_rate": 6e-05, "loss": 2.1212, "step": 3 }, { "epoch": 0.0003138054798281915, "grad_norm": 1.105882167816162, "learning_rate": 8e-05, "loss": 2.0143, "step": 4 }, { "epoch": 0.00039225684978523936, "grad_norm": 0.8526667952537537, "learning_rate": 0.0001, "loss": 2.2834, "step": 5 }, { "epoch": 0.00039225684978523936, "eval_loss": 2.1460585594177246, "eval_runtime": 1768.6661, "eval_samples_per_second": 3.035, "eval_steps_per_second": 1.518, "step": 5 }, { "epoch": 0.00047070821974228727, "grad_norm": 0.9058842062950134, "learning_rate": 0.00012, "loss": 2.1771, "step": 6 }, { "epoch": 0.0005491595896993352, "grad_norm": 0.7835561037063599, "learning_rate": 0.00014, "loss": 2.1572, "step": 7 }, { "epoch": 0.000627610959656383, "grad_norm": 0.7639698386192322, "learning_rate": 0.00016, "loss": 1.8649, "step": 8 }, { "epoch": 0.0007060623296134309, "grad_norm": 0.6497108936309814, "learning_rate": 0.00018, "loss": 2.1549, "step": 9 }, { "epoch": 0.0007845136995704787, "grad_norm": 1.7419626712799072, "learning_rate": 0.0002, "loss": 1.9657, "step": 10 }, { "epoch": 0.0007845136995704787, "eval_loss": 1.9750728607177734, "eval_runtime": 1790.5683, "eval_samples_per_second": 2.998, "eval_steps_per_second": 1.499, "step": 10 }, { "epoch": 0.0008629650695275266, "grad_norm": 0.8262554407119751, "learning_rate": 0.00019781476007338058, "loss": 2.0639, "step": 11 }, { "epoch": 0.0009414164394845745, "grad_norm": 1.002493143081665, "learning_rate": 0.0001913545457642601, "loss": 1.8902, "step": 12 }, { "epoch": 0.0010198678094416223, "grad_norm": 0.8235588073730469, "learning_rate": 0.00018090169943749476, "loss": 1.8743, "step": 13 }, { "epoch": 0.0010983191793986703, "grad_norm": 0.91981041431427, "learning_rate": 0.00016691306063588583, "loss": 1.9061, "step": 14 }, { "epoch": 0.0011767705493557182, "grad_norm": 0.7380278706550598, "learning_rate": 0.00015000000000000001, "loss": 1.9508, "step": 15 }, { "epoch": 0.0011767705493557182, "eval_loss": 1.8117865324020386, "eval_runtime": 1768.5844, "eval_samples_per_second": 3.035, "eval_steps_per_second": 1.518, "step": 15 }, { "epoch": 0.001255221919312766, "grad_norm": 0.757776141166687, "learning_rate": 0.00013090169943749476, "loss": 1.7871, "step": 16 }, { "epoch": 0.001333673289269814, "grad_norm": 0.7072300314903259, "learning_rate": 0.00011045284632676536, "loss": 1.6816, "step": 17 }, { "epoch": 0.0014121246592268617, "grad_norm": 0.7770305871963501, "learning_rate": 8.954715367323468e-05, "loss": 1.566, "step": 18 }, { "epoch": 0.0014905760291839096, "grad_norm": 0.8346571326255798, "learning_rate": 6.909830056250527e-05, "loss": 1.6497, "step": 19 }, { "epoch": 0.0015690273991409574, "grad_norm": 0.7390028238296509, "learning_rate": 5.000000000000002e-05, "loss": 1.8454, "step": 20 }, { "epoch": 0.0015690273991409574, "eval_loss": 1.7211201190948486, "eval_runtime": 1788.6648, "eval_samples_per_second": 3.001, "eval_steps_per_second": 1.501, "step": 20 }, { "epoch": 0.0016474787690980053, "grad_norm": 0.7469911575317383, "learning_rate": 3.308693936411421e-05, "loss": 1.6775, "step": 21 }, { "epoch": 0.0017259301390550531, "grad_norm": 0.8389664888381958, "learning_rate": 1.9098300562505266e-05, "loss": 1.5515, "step": 22 }, { "epoch": 0.0018043815090121012, "grad_norm": 0.7729347348213196, "learning_rate": 8.645454235739903e-06, "loss": 1.6718, "step": 23 }, { "epoch": 0.001882832878969149, "grad_norm": 0.998233437538147, "learning_rate": 2.1852399266194314e-06, "loss": 1.7309, "step": 24 }, { "epoch": 0.0019612842489261967, "grad_norm": 0.9571971297264099, "learning_rate": 0.0, "loss": 1.6594, "step": 25 }, { "epoch": 0.0019612842489261967, "eval_loss": 1.7028899192810059, "eval_runtime": 1794.3519, "eval_samples_per_second": 2.992, "eval_steps_per_second": 1.496, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.013847699750912e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }