{ "best_metric": 0.733076810836792, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.03545392098207361, "eval_steps": 25, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003545392098207361, "grad_norm": 4.848531246185303, "learning_rate": 2e-05, "loss": 5.0876, "step": 1 }, { "epoch": 0.0003545392098207361, "eval_loss": 6.216423511505127, "eval_runtime": 1.4346, "eval_samples_per_second": 34.853, "eval_steps_per_second": 9.062, "step": 1 }, { "epoch": 0.0007090784196414722, "grad_norm": 5.189554691314697, "learning_rate": 4e-05, "loss": 4.9835, "step": 2 }, { "epoch": 0.0010636176294622083, "grad_norm": 5.395487308502197, "learning_rate": 6e-05, "loss": 5.0002, "step": 3 }, { "epoch": 0.0014181568392829444, "grad_norm": 5.79182767868042, "learning_rate": 8e-05, "loss": 4.815, "step": 4 }, { "epoch": 0.0017726960491036806, "grad_norm": 5.924882411956787, "learning_rate": 0.0001, "loss": 3.5888, "step": 5 }, { "epoch": 0.0021272352589244165, "grad_norm": 5.22299861907959, "learning_rate": 9.997539658034168e-05, "loss": 2.5276, "step": 6 }, { "epoch": 0.002481774468745153, "grad_norm": 3.220250129699707, "learning_rate": 9.990161322484486e-05, "loss": 1.5851, "step": 7 }, { "epoch": 0.002836313678565889, "grad_norm": 1.6759289503097534, "learning_rate": 9.977873061452552e-05, "loss": 1.2539, "step": 8 }, { "epoch": 0.003190852888386625, "grad_norm": 1.6135218143463135, "learning_rate": 9.96068831197139e-05, "loss": 1.0919, "step": 9 }, { "epoch": 0.003545392098207361, "grad_norm": 1.962496042251587, "learning_rate": 9.938625865312251e-05, "loss": 0.8399, "step": 10 }, { "epoch": 0.003899931308028097, "grad_norm": 2.1563808917999268, "learning_rate": 9.911709846436641e-05, "loss": 0.7126, "step": 11 }, { "epoch": 0.004254470517848833, "grad_norm": 1.3578228950500488, "learning_rate": 9.879969687616027e-05, "loss": 0.7229, "step": 12 }, { "epoch": 0.004609009727669569, "grad_norm": 1.3786768913269043, "learning_rate": 9.84344009624807e-05, "loss": 0.8936, "step": 13 }, { "epoch": 0.004963548937490306, "grad_norm": 1.6054021120071411, "learning_rate": 9.80216101690461e-05, "loss": 1.1911, "step": 14 }, { "epoch": 0.005318088147311041, "grad_norm": 0.9699863791465759, "learning_rate": 9.756177587652856e-05, "loss": 1.0015, "step": 15 }, { "epoch": 0.005672627357131778, "grad_norm": 1.0598102807998657, "learning_rate": 9.705540090697575e-05, "loss": 1.1119, "step": 16 }, { "epoch": 0.006027166566952514, "grad_norm": 1.0990904569625854, "learning_rate": 9.650303897398232e-05, "loss": 0.8657, "step": 17 }, { "epoch": 0.00638170577677325, "grad_norm": 0.9300882816314697, "learning_rate": 9.590529407721231e-05, "loss": 0.8144, "step": 18 }, { "epoch": 0.006736244986593986, "grad_norm": 0.7585983276367188, "learning_rate": 9.526281984193436e-05, "loss": 0.7869, "step": 19 }, { "epoch": 0.007090784196414722, "grad_norm": 0.83787602186203, "learning_rate": 9.4576318804292e-05, "loss": 0.7916, "step": 20 }, { "epoch": 0.007445323406235459, "grad_norm": 0.7473427653312683, "learning_rate": 9.384654164309083e-05, "loss": 0.6681, "step": 21 }, { "epoch": 0.007799862616056194, "grad_norm": 0.5894711017608643, "learning_rate": 9.30742863589421e-05, "loss": 0.675, "step": 22 }, { "epoch": 0.008154401825876931, "grad_norm": 0.715570867061615, "learning_rate": 9.226039740166091e-05, "loss": 0.6853, "step": 23 }, { "epoch": 0.008508941035697666, "grad_norm": 0.6784505844116211, "learning_rate": 9.140576474687264e-05, "loss": 0.4911, "step": 24 }, { "epoch": 0.008863480245518402, "grad_norm": 0.7639828324317932, "learning_rate": 9.051132292283771e-05, "loss": 0.4635, "step": 25 }, { "epoch": 0.008863480245518402, "eval_loss": 0.7849562168121338, "eval_runtime": 1.4348, "eval_samples_per_second": 34.849, "eval_steps_per_second": 9.061, "step": 25 }, { "epoch": 0.009218019455339139, "grad_norm": 0.6689695119857788, "learning_rate": 8.957804998855866e-05, "loss": 1.0456, "step": 26 }, { "epoch": 0.009572558665159875, "grad_norm": 0.8705013990402222, "learning_rate": 8.860696646428693e-05, "loss": 1.0945, "step": 27 }, { "epoch": 0.009927097874980612, "grad_norm": 0.7108039259910583, "learning_rate": 8.759913421559902e-05, "loss": 1.0126, "step": 28 }, { "epoch": 0.010281637084801348, "grad_norm": 0.8193414211273193, "learning_rate": 8.655565529226198e-05, "loss": 0.9494, "step": 29 }, { "epoch": 0.010636176294622083, "grad_norm": 0.8473891019821167, "learning_rate": 8.547767072315835e-05, "loss": 0.7592, "step": 30 }, { "epoch": 0.010990715504442819, "grad_norm": 0.7946711182594299, "learning_rate": 8.436635926858759e-05, "loss": 0.7088, "step": 31 }, { "epoch": 0.011345254714263555, "grad_norm": 0.6517184972763062, "learning_rate": 8.322293613130917e-05, "loss": 0.6003, "step": 32 }, { "epoch": 0.011699793924084292, "grad_norm": 0.6414614915847778, "learning_rate": 8.204865162773613e-05, "loss": 0.7046, "step": 33 }, { "epoch": 0.012054333133905028, "grad_norm": 0.6361235976219177, "learning_rate": 8.084478982073247e-05, "loss": 0.5813, "step": 34 }, { "epoch": 0.012408872343725764, "grad_norm": 0.7114998698234558, "learning_rate": 7.961266711550922e-05, "loss": 0.4539, "step": 35 }, { "epoch": 0.0127634115535465, "grad_norm": 0.5631694793701172, "learning_rate": 7.835363082015468e-05, "loss": 0.4299, "step": 36 }, { "epoch": 0.013117950763367235, "grad_norm": 0.6165447235107422, "learning_rate": 7.706905767237288e-05, "loss": 0.3928, "step": 37 }, { "epoch": 0.013472489973187972, "grad_norm": 0.7139920592308044, "learning_rate": 7.576035233404096e-05, "loss": 0.7725, "step": 38 }, { "epoch": 0.013827029183008708, "grad_norm": 0.7106257081031799, "learning_rate": 7.442894585523218e-05, "loss": 0.9712, "step": 39 }, { "epoch": 0.014181568392829445, "grad_norm": 0.7304429411888123, "learning_rate": 7.307629410938363e-05, "loss": 0.8644, "step": 40 }, { "epoch": 0.014536107602650181, "grad_norm": 0.6472069025039673, "learning_rate": 7.170387620131993e-05, "loss": 0.7419, "step": 41 }, { "epoch": 0.014890646812470917, "grad_norm": 0.8418741822242737, "learning_rate": 7.031319284987394e-05, "loss": 0.7852, "step": 42 }, { "epoch": 0.015245186022291652, "grad_norm": 0.7708389759063721, "learning_rate": 6.890576474687263e-05, "loss": 0.6849, "step": 43 }, { "epoch": 0.015599725232112388, "grad_norm": 0.7205597758293152, "learning_rate": 6.7483130894283e-05, "loss": 0.6503, "step": 44 }, { "epoch": 0.015954264441933125, "grad_norm": 0.731856107711792, "learning_rate": 6.604684692133597e-05, "loss": 0.645, "step": 45 }, { "epoch": 0.016308803651753863, "grad_norm": 0.7058491110801697, "learning_rate": 6.459848338346861e-05, "loss": 0.5082, "step": 46 }, { "epoch": 0.016663342861574598, "grad_norm": 0.7743491530418396, "learning_rate": 6.313962404494496e-05, "loss": 0.5277, "step": 47 }, { "epoch": 0.017017882071395332, "grad_norm": 0.9558595418930054, "learning_rate": 6.167186414703289e-05, "loss": 0.5754, "step": 48 }, { "epoch": 0.01737242128121607, "grad_norm": 0.8757135272026062, "learning_rate": 6.019680866363139e-05, "loss": 0.5726, "step": 49 }, { "epoch": 0.017726960491036805, "grad_norm": 0.6394727230072021, "learning_rate": 5.8716070546254966e-05, "loss": 0.3937, "step": 50 }, { "epoch": 0.017726960491036805, "eval_loss": 0.7727236747741699, "eval_runtime": 1.4316, "eval_samples_per_second": 34.927, "eval_steps_per_second": 9.081, "step": 50 }, { "epoch": 0.018081499700857543, "grad_norm": 0.724443793296814, "learning_rate": 5.7231268960295e-05, "loss": 0.9597, "step": 51 }, { "epoch": 0.018436038910678278, "grad_norm": 0.7282636761665344, "learning_rate": 5.574402751448614e-05, "loss": 0.9182, "step": 52 }, { "epoch": 0.018790578120499012, "grad_norm": 0.6671382188796997, "learning_rate": 5.425597248551387e-05, "loss": 0.7908, "step": 53 }, { "epoch": 0.01914511733031975, "grad_norm": 0.7780370116233826, "learning_rate": 5.2768731039705e-05, "loss": 0.8091, "step": 54 }, { "epoch": 0.019499656540140485, "grad_norm": 0.810192346572876, "learning_rate": 5.128392945374505e-05, "loss": 0.7749, "step": 55 }, { "epoch": 0.019854195749961223, "grad_norm": 0.7341954112052917, "learning_rate": 4.980319133636863e-05, "loss": 0.6715, "step": 56 }, { "epoch": 0.020208734959781958, "grad_norm": 0.7532359957695007, "learning_rate": 4.83281358529671e-05, "loss": 0.6982, "step": 57 }, { "epoch": 0.020563274169602696, "grad_norm": 0.7728514075279236, "learning_rate": 4.686037595505507e-05, "loss": 0.668, "step": 58 }, { "epoch": 0.02091781337942343, "grad_norm": 0.6659958958625793, "learning_rate": 4.54015166165314e-05, "loss": 0.5427, "step": 59 }, { "epoch": 0.021272352589244165, "grad_norm": 0.6720303893089294, "learning_rate": 4.395315307866405e-05, "loss": 0.466, "step": 60 }, { "epoch": 0.021626891799064903, "grad_norm": 0.6820637583732605, "learning_rate": 4.2516869105717004e-05, "loss": 0.429, "step": 61 }, { "epoch": 0.021981431008885638, "grad_norm": 0.8596479892730713, "learning_rate": 4.109423525312738e-05, "loss": 0.4649, "step": 62 }, { "epoch": 0.022335970218706376, "grad_norm": 0.6639199256896973, "learning_rate": 3.968680715012606e-05, "loss": 0.5984, "step": 63 }, { "epoch": 0.02269050942852711, "grad_norm": 0.7899975180625916, "learning_rate": 3.829612379868006e-05, "loss": 1.0047, "step": 64 }, { "epoch": 0.02304504863834785, "grad_norm": 0.878670871257782, "learning_rate": 3.692370589061639e-05, "loss": 0.9028, "step": 65 }, { "epoch": 0.023399587848168583, "grad_norm": 1.0611952543258667, "learning_rate": 3.557105414476782e-05, "loss": 0.925, "step": 66 }, { "epoch": 0.023754127057989318, "grad_norm": 0.7318688631057739, "learning_rate": 3.423964766595906e-05, "loss": 0.7136, "step": 67 }, { "epoch": 0.024108666267810056, "grad_norm": 0.7990502715110779, "learning_rate": 3.293094232762715e-05, "loss": 0.6471, "step": 68 }, { "epoch": 0.02446320547763079, "grad_norm": 0.7018380761146545, "learning_rate": 3.164636917984534e-05, "loss": 0.7014, "step": 69 }, { "epoch": 0.02481774468745153, "grad_norm": 0.6606969833374023, "learning_rate": 3.0387332884490805e-05, "loss": 0.6384, "step": 70 }, { "epoch": 0.025172283897272264, "grad_norm": 0.7007325291633606, "learning_rate": 2.9155210179267546e-05, "loss": 0.4989, "step": 71 }, { "epoch": 0.025526823107093, "grad_norm": 0.7680234313011169, "learning_rate": 2.7951348372263875e-05, "loss": 0.5995, "step": 72 }, { "epoch": 0.025881362316913736, "grad_norm": 0.712881326675415, "learning_rate": 2.677706386869083e-05, "loss": 0.4674, "step": 73 }, { "epoch": 0.02623590152673447, "grad_norm": 0.7732957005500793, "learning_rate": 2.5633640731412412e-05, "loss": 0.5283, "step": 74 }, { "epoch": 0.02659044073655521, "grad_norm": 0.6836099028587341, "learning_rate": 2.4522329276841663e-05, "loss": 0.3073, "step": 75 }, { "epoch": 0.02659044073655521, "eval_loss": 0.7449390292167664, "eval_runtime": 1.4311, "eval_samples_per_second": 34.938, "eval_steps_per_second": 9.084, "step": 75 }, { "epoch": 0.026944979946375944, "grad_norm": 0.6937525272369385, "learning_rate": 2.3444344707738015e-05, "loss": 0.9399, "step": 76 }, { "epoch": 0.027299519156196682, "grad_norm": 0.7247990965843201, "learning_rate": 2.2400865784401e-05, "loss": 0.8895, "step": 77 }, { "epoch": 0.027654058366017416, "grad_norm": 0.6984252333641052, "learning_rate": 2.1393033535713093e-05, "loss": 0.7613, "step": 78 }, { "epoch": 0.02800859757583815, "grad_norm": 0.6211410164833069, "learning_rate": 2.0421950011441354e-05, "loss": 0.6435, "step": 79 }, { "epoch": 0.02836313678565889, "grad_norm": 0.7938443422317505, "learning_rate": 1.9488677077162295e-05, "loss": 0.5725, "step": 80 }, { "epoch": 0.028717675995479624, "grad_norm": 0.6987906098365784, "learning_rate": 1.8594235253127375e-05, "loss": 0.69, "step": 81 }, { "epoch": 0.029072215205300362, "grad_norm": 0.8338001370429993, "learning_rate": 1.77396025983391e-05, "loss": 0.7627, "step": 82 }, { "epoch": 0.029426754415121097, "grad_norm": 0.6527311205863953, "learning_rate": 1.6925713641057904e-05, "loss": 0.5054, "step": 83 }, { "epoch": 0.029781293624941835, "grad_norm": 0.7132216691970825, "learning_rate": 1.6153458356909176e-05, "loss": 0.5969, "step": 84 }, { "epoch": 0.03013583283476257, "grad_norm": 0.7264847159385681, "learning_rate": 1.5423681195707997e-05, "loss": 0.5129, "step": 85 }, { "epoch": 0.030490372044583304, "grad_norm": 0.7762057781219482, "learning_rate": 1.4737180158065644e-05, "loss": 0.4313, "step": 86 }, { "epoch": 0.030844911254404042, "grad_norm": 0.6852719187736511, "learning_rate": 1.4094705922787687e-05, "loss": 0.4345, "step": 87 }, { "epoch": 0.031199450464224777, "grad_norm": 0.7660951018333435, "learning_rate": 1.3496961026017687e-05, "loss": 0.724, "step": 88 }, { "epoch": 0.03155398967404551, "grad_norm": 0.7194827198982239, "learning_rate": 1.2944599093024267e-05, "loss": 0.8667, "step": 89 }, { "epoch": 0.03190852888386625, "grad_norm": 0.7124308943748474, "learning_rate": 1.2438224123471442e-05, "loss": 0.7907, "step": 90 }, { "epoch": 0.03226306809368699, "grad_norm": 0.8714865446090698, "learning_rate": 1.1978389830953907e-05, "loss": 0.8492, "step": 91 }, { "epoch": 0.032617607303507726, "grad_norm": 0.757225513458252, "learning_rate": 1.1565599037519316e-05, "loss": 0.742, "step": 92 }, { "epoch": 0.03297214651332846, "grad_norm": 0.754546046257019, "learning_rate": 1.1200303123839742e-05, "loss": 0.6044, "step": 93 }, { "epoch": 0.033326685723149195, "grad_norm": 0.770008385181427, "learning_rate": 1.088290153563358e-05, "loss": 0.6478, "step": 94 }, { "epoch": 0.03368122493296993, "grad_norm": 0.8305578231811523, "learning_rate": 1.0613741346877497e-05, "loss": 0.647, "step": 95 }, { "epoch": 0.034035764142790664, "grad_norm": 0.9594391584396362, "learning_rate": 1.0393116880286118e-05, "loss": 0.6551, "step": 96 }, { "epoch": 0.0343903033526114, "grad_norm": 0.823460042476654, "learning_rate": 1.0221269385474488e-05, "loss": 0.5407, "step": 97 }, { "epoch": 0.03474484256243214, "grad_norm": 0.8756825923919678, "learning_rate": 1.0098386775155147e-05, "loss": 0.4781, "step": 98 }, { "epoch": 0.03509938177225287, "grad_norm": 0.7407690286636353, "learning_rate": 1.0024603419658329e-05, "loss": 0.4027, "step": 99 }, { "epoch": 0.03545392098207361, "grad_norm": 0.6229119300842285, "learning_rate": 1e-05, "loss": 0.276, "step": 100 }, { "epoch": 0.03545392098207361, "eval_loss": 0.733076810836792, "eval_runtime": 1.4314, "eval_samples_per_second": 34.932, "eval_steps_per_second": 9.082, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1249873143390536e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }