{ "best_metric": 2.9008495807647705, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 3.0442477876106193, "eval_steps": 25, "global_step": 43, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07079646017699115, "grad_norm": 0.5193688273429871, "learning_rate": 5e-05, "loss": 3.0404, "step": 1 }, { "epoch": 0.07079646017699115, "eval_loss": 3.3404901027679443, "eval_runtime": 2.4194, "eval_samples_per_second": 39.265, "eval_steps_per_second": 4.96, "step": 1 }, { "epoch": 0.1415929203539823, "grad_norm": 0.8239558935165405, "learning_rate": 0.0001, "loss": 3.2982, "step": 2 }, { "epoch": 0.21238938053097345, "grad_norm": 1.1576820611953735, "learning_rate": 9.985329005918702e-05, "loss": 3.4534, "step": 3 }, { "epoch": 0.2831858407079646, "grad_norm": 0.6357854008674622, "learning_rate": 9.941402118901744e-05, "loss": 3.1246, "step": 4 }, { "epoch": 0.35398230088495575, "grad_norm": 0.41208094358444214, "learning_rate": 9.868477119388896e-05, "loss": 2.9787, "step": 5 }, { "epoch": 0.4247787610619469, "grad_norm": 0.6425259709358215, "learning_rate": 9.766981960274653e-05, "loss": 3.1863, "step": 6 }, { "epoch": 0.49557522123893805, "grad_norm": 0.9896420836448669, "learning_rate": 9.637512255510475e-05, "loss": 3.2937, "step": 7 }, { "epoch": 0.5663716814159292, "grad_norm": 0.4809434115886688, "learning_rate": 9.480827784805278e-05, "loss": 2.9278, "step": 8 }, { "epoch": 0.6371681415929203, "grad_norm": 0.4268536865711212, "learning_rate": 9.297848034936006e-05, "loss": 2.968, "step": 9 }, { "epoch": 0.7079646017699115, "grad_norm": 0.43410423398017883, "learning_rate": 9.089646803833589e-05, "loss": 3.0674, "step": 10 }, { "epoch": 0.7787610619469026, "grad_norm": 0.40264034271240234, "learning_rate": 8.857445899109715e-05, "loss": 2.9144, "step": 11 }, { "epoch": 0.8495575221238938, "grad_norm": 0.3227675259113312, "learning_rate": 8.602607968003935e-05, "loss": 2.86, "step": 12 }, { "epoch": 0.9203539823008849, "grad_norm": 0.3561253547668457, "learning_rate": 8.326628500827826e-05, "loss": 2.9454, "step": 13 }, { "epoch": 0.9911504424778761, "grad_norm": 0.5675299167633057, "learning_rate": 8.03112705483319e-05, "loss": 3.0785, "step": 14 }, { "epoch": 1.0619469026548674, "grad_norm": 0.8450691103935242, "learning_rate": 7.717837750006106e-05, "loss": 5.3565, "step": 15 }, { "epoch": 1.1327433628318584, "grad_norm": 0.4789603650569916, "learning_rate": 7.388599092561315e-05, "loss": 2.8486, "step": 16 }, { "epoch": 1.2035398230088497, "grad_norm": 0.6054747104644775, "learning_rate": 7.045343185856701e-05, "loss": 2.9886, "step": 17 }, { "epoch": 1.2743362831858407, "grad_norm": 0.441631942987442, "learning_rate": 6.690084392042513e-05, "loss": 2.6782, "step": 18 }, { "epoch": 1.3451327433628317, "grad_norm": 0.43877655267715454, "learning_rate": 6.32490751098331e-05, "loss": 2.9849, "step": 19 }, { "epoch": 1.415929203539823, "grad_norm": 0.559032678604126, "learning_rate": 5.951955545823342e-05, "loss": 2.9156, "step": 20 }, { "epoch": 1.4867256637168142, "grad_norm": 0.7596845030784607, "learning_rate": 5.573417126992003e-05, "loss": 3.1224, "step": 21 }, { "epoch": 1.5575221238938053, "grad_norm": 0.4125724136829376, "learning_rate": 5.191513668450178e-05, "loss": 2.7414, "step": 22 }, { "epoch": 1.6283185840707963, "grad_norm": 0.5687753558158875, "learning_rate": 4.8084863315498234e-05, "loss": 2.8063, "step": 23 }, { "epoch": 1.6991150442477876, "grad_norm": 0.6780652403831482, "learning_rate": 4.4265828730079987e-05, "loss": 2.9407, "step": 24 }, { "epoch": 1.7699115044247788, "grad_norm": 0.4667285680770874, "learning_rate": 4.0480444541766576e-05, "loss": 2.6603, "step": 25 }, { "epoch": 1.7699115044247788, "eval_loss": 2.9008495807647705, "eval_runtime": 2.4198, "eval_samples_per_second": 39.26, "eval_steps_per_second": 4.959, "step": 25 }, { "epoch": 1.8407079646017699, "grad_norm": 0.4535748362541199, "learning_rate": 3.675092489016693e-05, "loss": 2.9285, "step": 26 }, { "epoch": 1.911504424778761, "grad_norm": 0.5968184471130371, "learning_rate": 3.309915607957487e-05, "loss": 2.8405, "step": 27 }, { "epoch": 1.9823008849557522, "grad_norm": 0.7783849239349365, "learning_rate": 2.9546568141433006e-05, "loss": 3.0378, "step": 28 }, { "epoch": 2.0530973451327434, "grad_norm": 0.8722625374794006, "learning_rate": 2.6114009074386846e-05, "loss": 5.2923, "step": 29 }, { "epoch": 2.1238938053097347, "grad_norm": 0.5258104801177979, "learning_rate": 2.282162249993895e-05, "loss": 2.7651, "step": 30 }, { "epoch": 2.1946902654867255, "grad_norm": 0.6721611618995667, "learning_rate": 1.9688729451668114e-05, "loss": 2.8586, "step": 31 }, { "epoch": 2.265486725663717, "grad_norm": 0.4610719084739685, "learning_rate": 1.673371499172174e-05, "loss": 2.4631, "step": 32 }, { "epoch": 2.336283185840708, "grad_norm": 0.5183499455451965, "learning_rate": 1.3973920319960655e-05, "loss": 3.2142, "step": 33 }, { "epoch": 2.4070796460176993, "grad_norm": 0.5465492010116577, "learning_rate": 1.1425541008902851e-05, "loss": 2.848, "step": 34 }, { "epoch": 2.47787610619469, "grad_norm": 0.8566042184829712, "learning_rate": 9.103531961664118e-06, "loss": 3.0925, "step": 35 }, { "epoch": 2.5486725663716814, "grad_norm": 0.4418770968914032, "learning_rate": 7.0215196506399515e-06, "loss": 2.7127, "step": 36 }, { "epoch": 2.6194690265486726, "grad_norm": 0.523547351360321, "learning_rate": 5.191722151947226e-06, "loss": 2.6927, "step": 37 }, { "epoch": 2.6902654867256635, "grad_norm": 0.6238358616828918, "learning_rate": 3.6248774448952695e-06, "loss": 2.9245, "step": 38 }, { "epoch": 2.7610619469026547, "grad_norm": 0.423245370388031, "learning_rate": 2.330180397253473e-06, "loss": 2.3783, "step": 39 }, { "epoch": 2.831858407079646, "grad_norm": 0.5013735890388489, "learning_rate": 1.3152288061110518e-06, "loss": 3.1361, "step": 40 }, { "epoch": 2.9026548672566372, "grad_norm": 0.5441763997077942, "learning_rate": 5.859788109825793e-07, "loss": 2.8389, "step": 41 }, { "epoch": 2.9734513274336285, "grad_norm": 0.7712870836257935, "learning_rate": 1.4670994081297795e-07, "loss": 3.0242, "step": 42 }, { "epoch": 3.0442477876106193, "grad_norm": 0.7878711223602295, "learning_rate": 0.0, "loss": 5.2321, "step": 43 } ], "logging_steps": 1, "max_steps": 43, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.112012643421389e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }