{ "best_metric": 0.15553146600723267, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.017752529735487306, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007101011894194922, "grad_norm": 0.7088547945022583, "learning_rate": 2e-05, "loss": 0.4879, "step": 1 }, { "epoch": 0.0007101011894194922, "eval_loss": 0.48425838351249695, "eval_runtime": 174.3603, "eval_samples_per_second": 3.401, "eval_steps_per_second": 1.703, "step": 1 }, { "epoch": 0.0014202023788389845, "grad_norm": 0.6956668496131897, "learning_rate": 4e-05, "loss": 0.4399, "step": 2 }, { "epoch": 0.002130303568258477, "grad_norm": 0.7884685397148132, "learning_rate": 6e-05, "loss": 0.4986, "step": 3 }, { "epoch": 0.002840404757677969, "grad_norm": 0.652370274066925, "learning_rate": 8e-05, "loss": 0.4597, "step": 4 }, { "epoch": 0.0035505059470974615, "grad_norm": 0.5573405623435974, "learning_rate": 0.0001, "loss": 0.4164, "step": 5 }, { "epoch": 0.0035505059470974615, "eval_loss": 0.3724066913127899, "eval_runtime": 174.2204, "eval_samples_per_second": 3.404, "eval_steps_per_second": 1.705, "step": 5 }, { "epoch": 0.004260607136516954, "grad_norm": 0.4771481454372406, "learning_rate": 0.00012, "loss": 0.3851, "step": 6 }, { "epoch": 0.004970708325936446, "grad_norm": 0.559291660785675, "learning_rate": 0.00014, "loss": 0.3149, "step": 7 }, { "epoch": 0.005680809515355938, "grad_norm": 0.4687493145465851, "learning_rate": 0.00016, "loss": 0.3091, "step": 8 }, { "epoch": 0.00639091070477543, "grad_norm": 0.4188617467880249, "learning_rate": 0.00018, "loss": 0.2673, "step": 9 }, { "epoch": 0.007101011894194923, "grad_norm": 0.38055554032325745, "learning_rate": 0.0002, "loss": 0.2534, "step": 10 }, { "epoch": 0.007101011894194923, "eval_loss": 0.230683833360672, "eval_runtime": 174.0967, "eval_samples_per_second": 3.406, "eval_steps_per_second": 1.706, "step": 10 }, { "epoch": 0.0078111130836144155, "grad_norm": 0.37593263387680054, "learning_rate": 0.00019781476007338058, "loss": 0.2421, "step": 11 }, { "epoch": 0.008521214273033907, "grad_norm": 0.39975905418395996, "learning_rate": 0.0001913545457642601, "loss": 0.2147, "step": 12 }, { "epoch": 0.009231315462453399, "grad_norm": 0.3451046347618103, "learning_rate": 0.00018090169943749476, "loss": 0.2199, "step": 13 }, { "epoch": 0.009941416651872892, "grad_norm": 0.30507731437683105, "learning_rate": 0.00016691306063588583, "loss": 0.1502, "step": 14 }, { "epoch": 0.010651517841292384, "grad_norm": 0.3645358979701996, "learning_rate": 0.00015000000000000001, "loss": 0.1933, "step": 15 }, { "epoch": 0.010651517841292384, "eval_loss": 0.17332763969898224, "eval_runtime": 173.1726, "eval_samples_per_second": 3.424, "eval_steps_per_second": 1.715, "step": 15 }, { "epoch": 0.011361619030711876, "grad_norm": 0.29906368255615234, "learning_rate": 0.00013090169943749476, "loss": 0.1612, "step": 16 }, { "epoch": 0.012071720220131369, "grad_norm": 0.31010934710502625, "learning_rate": 0.00011045284632676536, "loss": 0.1729, "step": 17 }, { "epoch": 0.01278182140955086, "grad_norm": 0.29333606362342834, "learning_rate": 8.954715367323468e-05, "loss": 0.1675, "step": 18 }, { "epoch": 0.013491922598970352, "grad_norm": 0.3287576735019684, "learning_rate": 6.909830056250527e-05, "loss": 0.1723, "step": 19 }, { "epoch": 0.014202023788389846, "grad_norm": 0.26639440655708313, "learning_rate": 5.000000000000002e-05, "loss": 0.1551, "step": 20 }, { "epoch": 0.014202023788389846, "eval_loss": 0.15883538126945496, "eval_runtime": 172.9527, "eval_samples_per_second": 3.429, "eval_steps_per_second": 1.717, "step": 20 }, { "epoch": 0.014912124977809338, "grad_norm": 0.29583197832107544, "learning_rate": 3.308693936411421e-05, "loss": 0.1597, "step": 21 }, { "epoch": 0.015622226167228831, "grad_norm": 0.20675452053546906, "learning_rate": 1.9098300562505266e-05, "loss": 0.1089, "step": 22 }, { "epoch": 0.016332327356648323, "grad_norm": 0.2454639971256256, "learning_rate": 8.645454235739903e-06, "loss": 0.1296, "step": 23 }, { "epoch": 0.017042428546067814, "grad_norm": 0.2648043930530548, "learning_rate": 2.1852399266194314e-06, "loss": 0.1249, "step": 24 }, { "epoch": 0.017752529735487306, "grad_norm": 0.22896485030651093, "learning_rate": 0.0, "loss": 0.1387, "step": 25 }, { "epoch": 0.017752529735487306, "eval_loss": 0.15553146600723267, "eval_runtime": 172.9969, "eval_samples_per_second": 3.428, "eval_steps_per_second": 1.717, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8876602394935296.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }