{ "best_metric": 1.4121884107589722, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.05830903790087463, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0023323615160349854, "grad_norm": 1.5346574783325195, "learning_rate": 2e-05, "loss": 2.2105, "step": 1 }, { "epoch": 0.0023323615160349854, "eval_loss": 1.6742810010910034, "eval_runtime": 36.5226, "eval_samples_per_second": 4.956, "eval_steps_per_second": 2.492, "step": 1 }, { "epoch": 0.004664723032069971, "grad_norm": 1.4139418601989746, "learning_rate": 4e-05, "loss": 1.4349, "step": 2 }, { "epoch": 0.006997084548104956, "grad_norm": 0.9706037044525146, "learning_rate": 6e-05, "loss": 0.9538, "step": 3 }, { "epoch": 0.009329446064139942, "grad_norm": 1.288402795791626, "learning_rate": 8e-05, "loss": 1.5789, "step": 4 }, { "epoch": 0.011661807580174927, "grad_norm": 1.1371190547943115, "learning_rate": 0.0001, "loss": 1.4299, "step": 5 }, { "epoch": 0.011661807580174927, "eval_loss": 1.5869524478912354, "eval_runtime": 36.6166, "eval_samples_per_second": 4.943, "eval_steps_per_second": 2.485, "step": 5 }, { "epoch": 0.013994169096209912, "grad_norm": 1.3522026538848877, "learning_rate": 0.00012, "loss": 2.1149, "step": 6 }, { "epoch": 0.0163265306122449, "grad_norm": 0.786292314529419, "learning_rate": 0.00014, "loss": 1.0683, "step": 7 }, { "epoch": 0.018658892128279883, "grad_norm": 0.6935855150222778, "learning_rate": 0.00016, "loss": 1.747, "step": 8 }, { "epoch": 0.02099125364431487, "grad_norm": 0.5766379237174988, "learning_rate": 0.00018, "loss": 1.4661, "step": 9 }, { "epoch": 0.023323615160349854, "grad_norm": 0.5700016021728516, "learning_rate": 0.0002, "loss": 1.0889, "step": 10 }, { "epoch": 0.023323615160349854, "eval_loss": 1.4536380767822266, "eval_runtime": 36.7676, "eval_samples_per_second": 4.923, "eval_steps_per_second": 2.475, "step": 10 }, { "epoch": 0.02565597667638484, "grad_norm": 0.7684414982795715, "learning_rate": 0.00019781476007338058, "loss": 1.7667, "step": 11 }, { "epoch": 0.027988338192419825, "grad_norm": 0.6388105154037476, "learning_rate": 0.0001913545457642601, "loss": 1.0316, "step": 12 }, { "epoch": 0.030320699708454812, "grad_norm": 0.6296985149383545, "learning_rate": 0.00018090169943749476, "loss": 1.4975, "step": 13 }, { "epoch": 0.0326530612244898, "grad_norm": 0.39826327562332153, "learning_rate": 0.00016691306063588583, "loss": 0.9386, "step": 14 }, { "epoch": 0.03498542274052478, "grad_norm": 0.5714980363845825, "learning_rate": 0.00015000000000000001, "loss": 1.6855, "step": 15 }, { "epoch": 0.03498542274052478, "eval_loss": 1.4195315837860107, "eval_runtime": 36.5975, "eval_samples_per_second": 4.946, "eval_steps_per_second": 2.487, "step": 15 }, { "epoch": 0.037317784256559766, "grad_norm": 0.6020028591156006, "learning_rate": 0.00013090169943749476, "loss": 1.8779, "step": 16 }, { "epoch": 0.03965014577259475, "grad_norm": 0.49338093400001526, "learning_rate": 0.00011045284632676536, "loss": 1.465, "step": 17 }, { "epoch": 0.04198250728862974, "grad_norm": 0.7286747694015503, "learning_rate": 8.954715367323468e-05, "loss": 1.2786, "step": 18 }, { "epoch": 0.044314868804664724, "grad_norm": 0.5588726997375488, "learning_rate": 6.909830056250527e-05, "loss": 1.398, "step": 19 }, { "epoch": 0.04664723032069971, "grad_norm": 0.5118404626846313, "learning_rate": 5.000000000000002e-05, "loss": 1.3029, "step": 20 }, { "epoch": 0.04664723032069971, "eval_loss": 1.4150354862213135, "eval_runtime": 36.6336, "eval_samples_per_second": 4.941, "eval_steps_per_second": 2.484, "step": 20 }, { "epoch": 0.04897959183673469, "grad_norm": 0.6569919586181641, "learning_rate": 3.308693936411421e-05, "loss": 2.004, "step": 21 }, { "epoch": 0.05131195335276968, "grad_norm": 0.45704540610313416, "learning_rate": 1.9098300562505266e-05, "loss": 1.1221, "step": 22 }, { "epoch": 0.053644314868804666, "grad_norm": 0.5303517580032349, "learning_rate": 8.645454235739903e-06, "loss": 1.7539, "step": 23 }, { "epoch": 0.05597667638483965, "grad_norm": 0.5232126116752625, "learning_rate": 2.1852399266194314e-06, "loss": 1.5575, "step": 24 }, { "epoch": 0.05830903790087463, "grad_norm": 0.5050119161605835, "learning_rate": 0.0, "loss": 1.474, "step": 25 }, { "epoch": 0.05830903790087463, "eval_loss": 1.4121884107589722, "eval_runtime": 36.7496, "eval_samples_per_second": 4.925, "eval_steps_per_second": 2.476, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2487984220274688.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }