{ "best_metric": 0.2623326778411865, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.3714020427112349, "eval_steps": 25, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014856081708449397, "grad_norm": 12.877364158630371, "learning_rate": 5e-05, "loss": 2.5011, "step": 1 }, { "epoch": 0.014856081708449397, "eval_loss": 2.4801716804504395, "eval_runtime": 3.1845, "eval_samples_per_second": 15.701, "eval_steps_per_second": 4.082, "step": 1 }, { "epoch": 0.029712163416898793, "grad_norm": 13.52737045288086, "learning_rate": 0.0001, "loss": 2.497, "step": 2 }, { "epoch": 0.04456824512534819, "grad_norm": 11.447822570800781, "learning_rate": 9.958086757163489e-05, "loss": 1.6155, "step": 3 }, { "epoch": 0.059424326833797586, "grad_norm": 4.801721096038818, "learning_rate": 9.833127793065098e-05, "loss": 0.4131, "step": 4 }, { "epoch": 0.07428040854224698, "grad_norm": 6.5657267570495605, "learning_rate": 9.627450856774539e-05, "loss": 0.374, "step": 5 }, { "epoch": 0.08913649025069638, "grad_norm": 1.8930745124816895, "learning_rate": 9.3448873204592e-05, "loss": 0.2925, "step": 6 }, { "epoch": 0.10399257195914577, "grad_norm": 2.2727997303009033, "learning_rate": 8.990700808169889e-05, "loss": 0.3141, "step": 7 }, { "epoch": 0.11884865366759517, "grad_norm": 0.4426746070384979, "learning_rate": 8.571489144483944e-05, "loss": 0.2571, "step": 8 }, { "epoch": 0.13370473537604458, "grad_norm": 1.8637906312942505, "learning_rate": 8.095061449516903e-05, "loss": 0.2943, "step": 9 }, { "epoch": 0.14856081708449395, "grad_norm": 0.5470730066299438, "learning_rate": 7.570292669790186e-05, "loss": 0.2635, "step": 10 }, { "epoch": 0.16341689879294335, "grad_norm": 0.6328050494194031, "learning_rate": 7.006958254769438e-05, "loss": 0.2608, "step": 11 }, { "epoch": 0.17827298050139276, "grad_norm": 1.073302984237671, "learning_rate": 6.415552058736854e-05, "loss": 0.3126, "step": 12 }, { "epoch": 0.19312906220984216, "grad_norm": 0.916024386882782, "learning_rate": 5.80709086014102e-05, "loss": 0.2678, "step": 13 }, { "epoch": 0.20798514391829154, "grad_norm": 0.5357609391212463, "learning_rate": 5.192909139858981e-05, "loss": 0.2662, "step": 14 }, { "epoch": 0.22284122562674094, "grad_norm": 0.5085605978965759, "learning_rate": 4.584447941263149e-05, "loss": 0.266, "step": 15 }, { "epoch": 0.23769730733519034, "grad_norm": 0.2650889456272125, "learning_rate": 3.9930417452305626e-05, "loss": 0.2618, "step": 16 }, { "epoch": 0.2525533890436397, "grad_norm": 0.7138887643814087, "learning_rate": 3.4297073302098156e-05, "loss": 0.2805, "step": 17 }, { "epoch": 0.26740947075208915, "grad_norm": 0.6649295687675476, "learning_rate": 2.9049385504830985e-05, "loss": 0.2886, "step": 18 }, { "epoch": 0.2822655524605385, "grad_norm": 0.572088360786438, "learning_rate": 2.4285108555160577e-05, "loss": 0.2651, "step": 19 }, { "epoch": 0.2971216341689879, "grad_norm": 0.5603142380714417, "learning_rate": 2.0092991918301108e-05, "loss": 0.2658, "step": 20 }, { "epoch": 0.31197771587743733, "grad_norm": 0.5217267870903015, "learning_rate": 1.6551126795408016e-05, "loss": 0.2673, "step": 21 }, { "epoch": 0.3268337975858867, "grad_norm": 0.8165284395217896, "learning_rate": 1.3725491432254624e-05, "loss": 0.2576, "step": 22 }, { "epoch": 0.34168987929433614, "grad_norm": 0.42604365944862366, "learning_rate": 1.1668722069349041e-05, "loss": 0.2482, "step": 23 }, { "epoch": 0.3565459610027855, "grad_norm": 0.24891525506973267, "learning_rate": 1.0419132428365116e-05, "loss": 0.2606, "step": 24 }, { "epoch": 0.3714020427112349, "grad_norm": 0.43413084745407104, "learning_rate": 1e-05, "loss": 0.2612, "step": 25 }, { "epoch": 0.3714020427112349, "eval_loss": 0.2623326778411865, "eval_runtime": 3.2444, "eval_samples_per_second": 15.411, "eval_steps_per_second": 4.007, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.984041808658432e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }