{ "best_metric": 1.2625093460083008, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 2.6079869600651997, "eval_steps": 25, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.026079869600651995, "grad_norm": 0.6620760560035706, "learning_rate": 2.5e-05, "loss": 1.2501, "step": 1 }, { "epoch": 0.026079869600651995, "eval_loss": 1.6212151050567627, "eval_runtime": 0.4971, "eval_samples_per_second": 100.592, "eval_steps_per_second": 26.154, "step": 1 }, { "epoch": 0.05215973920130399, "grad_norm": 1.0186434984207153, "learning_rate": 5e-05, "loss": 1.4369, "step": 2 }, { "epoch": 0.07823960880195599, "grad_norm": 0.7852559089660645, "learning_rate": 7.500000000000001e-05, "loss": 1.3094, "step": 3 }, { "epoch": 0.10431947840260798, "grad_norm": 0.6789484024047852, "learning_rate": 0.0001, "loss": 1.3378, "step": 4 }, { "epoch": 0.13039934800325997, "grad_norm": 0.5586358308792114, "learning_rate": 9.997590643643647e-05, "loss": 1.4974, "step": 5 }, { "epoch": 0.15647921760391198, "grad_norm": 0.3749651610851288, "learning_rate": 9.990365154573717e-05, "loss": 1.3339, "step": 6 }, { "epoch": 0.18255908720456399, "grad_norm": 0.42121371626853943, "learning_rate": 9.978331270024886e-05, "loss": 1.5307, "step": 7 }, { "epoch": 0.20863895680521596, "grad_norm": 2.6662514209747314, "learning_rate": 9.961501876182148e-05, "loss": 1.7291, "step": 8 }, { "epoch": 0.23471882640586797, "grad_norm": 0.906875491142273, "learning_rate": 9.939894994381957e-05, "loss": 1.7642, "step": 9 }, { "epoch": 0.26079869600651995, "grad_norm": 0.32841840386390686, "learning_rate": 9.913533761814537e-05, "loss": 1.1277, "step": 10 }, { "epoch": 0.28687856560717195, "grad_norm": 0.32854458689689636, "learning_rate": 9.882446406748002e-05, "loss": 1.2506, "step": 11 }, { "epoch": 0.31295843520782396, "grad_norm": 0.2924271523952484, "learning_rate": 9.846666218300807e-05, "loss": 1.3405, "step": 12 }, { "epoch": 0.33903830480847597, "grad_norm": 0.28709307312965393, "learning_rate": 9.80623151079494e-05, "loss": 1.276, "step": 13 }, { "epoch": 0.36511817440912797, "grad_norm": 0.26937562227249146, "learning_rate": 9.761185582727977e-05, "loss": 1.3337, "step": 14 }, { "epoch": 0.39119804400978, "grad_norm": 0.25019437074661255, "learning_rate": 9.711576670407965e-05, "loss": 1.3807, "step": 15 }, { "epoch": 0.4172779136104319, "grad_norm": 0.27004799246788025, "learning_rate": 9.657457896300791e-05, "loss": 1.3273, "step": 16 }, { "epoch": 0.44335778321108393, "grad_norm": 0.3201175034046173, "learning_rate": 9.598887212145291e-05, "loss": 1.5258, "step": 17 }, { "epoch": 0.46943765281173594, "grad_norm": 0.4178328216075897, "learning_rate": 9.535927336897098e-05, "loss": 1.6499, "step": 18 }, { "epoch": 0.49551752241238795, "grad_norm": 0.9188335537910461, "learning_rate": 9.468645689567598e-05, "loss": 1.846, "step": 19 }, { "epoch": 0.5215973920130399, "grad_norm": 0.2169221043586731, "learning_rate": 9.397114317029975e-05, "loss": 1.1765, "step": 20 }, { "epoch": 0.5476772616136919, "grad_norm": 0.2450340837240219, "learning_rate": 9.321409816869605e-05, "loss": 1.2586, "step": 21 }, { "epoch": 0.5737571312143439, "grad_norm": 0.2194407731294632, "learning_rate": 9.241613255361455e-05, "loss": 1.2651, "step": 22 }, { "epoch": 0.5998370008149959, "grad_norm": 0.216043621301651, "learning_rate": 9.157810080662269e-05, "loss": 1.2809, "step": 23 }, { "epoch": 0.6259168704156479, "grad_norm": 0.2400970458984375, "learning_rate": 9.070090031310558e-05, "loss": 1.3573, "step": 24 }, { "epoch": 0.6519967400162999, "grad_norm": 0.2485317438840866, "learning_rate": 8.978547040132317e-05, "loss": 1.2768, "step": 25 }, { "epoch": 0.6519967400162999, "eval_loss": 1.3168331384658813, "eval_runtime": 0.4947, "eval_samples_per_second": 101.078, "eval_steps_per_second": 26.28, "step": 25 }, { "epoch": 0.6780766096169519, "grad_norm": 0.858853280544281, "learning_rate": 8.883279133655399e-05, "loss": 1.4349, "step": 26 }, { "epoch": 0.7041564792176039, "grad_norm": 0.3207035958766937, "learning_rate": 8.78438832714026e-05, "loss": 1.5518, "step": 27 }, { "epoch": 0.7302363488182559, "grad_norm": 0.47130295634269714, "learning_rate": 8.681980515339464e-05, "loss": 1.5278, "step": 28 }, { "epoch": 0.756316218418908, "grad_norm": 0.18217793107032776, "learning_rate": 8.57616535910292e-05, "loss": 1.0693, "step": 29 }, { "epoch": 0.78239608801956, "grad_norm": 0.21631065011024475, "learning_rate": 8.467056167950311e-05, "loss": 1.3558, "step": 30 }, { "epoch": 0.8084759576202118, "grad_norm": 0.19864201545715332, "learning_rate": 8.354769778736406e-05, "loss": 1.3028, "step": 31 }, { "epoch": 0.8345558272208639, "grad_norm": 0.19812346994876862, "learning_rate": 8.239426430539243e-05, "loss": 1.3066, "step": 32 }, { "epoch": 0.8606356968215159, "grad_norm": 0.23884153366088867, "learning_rate": 8.12114963590511e-05, "loss": 1.3189, "step": 33 }, { "epoch": 0.8867155664221679, "grad_norm": 0.2092161476612091, "learning_rate": 8.000066048588211e-05, "loss": 1.273, "step": 34 }, { "epoch": 0.9127954360228199, "grad_norm": 0.26922526955604553, "learning_rate": 7.876305327926657e-05, "loss": 1.3761, "step": 35 }, { "epoch": 0.9388753056234719, "grad_norm": 0.2948414087295532, "learning_rate": 7.75e-05, "loss": 1.3617, "step": 36 }, { "epoch": 0.9649551752241239, "grad_norm": 0.44491636753082275, "learning_rate": 7.62128531571699e-05, "loss": 1.5529, "step": 37 }, { "epoch": 0.9910350448247759, "grad_norm": 0.7980100512504578, "learning_rate": 7.490299105985507e-05, "loss": 1.6605, "step": 38 }, { "epoch": 1.017114914425428, "grad_norm": 0.42900562286376953, "learning_rate": 7.357181634119777e-05, "loss": 1.9593, "step": 39 }, { "epoch": 1.0431947840260798, "grad_norm": 0.17168301343917847, "learning_rate": 7.222075445642904e-05, "loss": 1.1427, "step": 40 }, { "epoch": 1.069274653626732, "grad_norm": 0.18627986311912537, "learning_rate": 7.085125215645552e-05, "loss": 1.3079, "step": 41 }, { "epoch": 1.0953545232273838, "grad_norm": 0.19410377740859985, "learning_rate": 6.946477593864228e-05, "loss": 1.2263, "step": 42 }, { "epoch": 1.121434392828036, "grad_norm": 0.20103414356708527, "learning_rate": 6.80628104764508e-05, "loss": 1.29, "step": 43 }, { "epoch": 1.1475142624286878, "grad_norm": 0.229043647646904, "learning_rate": 6.664685702961344e-05, "loss": 1.2465, "step": 44 }, { "epoch": 1.17359413202934, "grad_norm": 1.0233486890792847, "learning_rate": 6.52184318365468e-05, "loss": 1.3942, "step": 45 }, { "epoch": 1.1996740016299918, "grad_norm": 0.29909205436706543, "learning_rate": 6.377906449072578e-05, "loss": 1.4629, "step": 46 }, { "epoch": 1.225753871230644, "grad_norm": 0.389396071434021, "learning_rate": 6.23302963027565e-05, "loss": 1.5736, "step": 47 }, { "epoch": 1.2518337408312958, "grad_norm": 0.2234639823436737, "learning_rate": 6.087367864990233e-05, "loss": 0.7327, "step": 48 }, { "epoch": 1.277913610431948, "grad_norm": 0.21025896072387695, "learning_rate": 5.941077131483025e-05, "loss": 1.5035, "step": 49 }, { "epoch": 1.3039934800325998, "grad_norm": 0.17033106088638306, "learning_rate": 5.794314081535644e-05, "loss": 1.1971, "step": 50 }, { "epoch": 1.3039934800325998, "eval_loss": 1.276837706565857, "eval_runtime": 0.4933, "eval_samples_per_second": 101.363, "eval_steps_per_second": 26.355, "step": 50 }, { "epoch": 1.3300733496332517, "grad_norm": 0.18216781318187714, "learning_rate": 5.6472358726979935e-05, "loss": 1.2561, "step": 51 }, { "epoch": 1.3561532192339039, "grad_norm": 0.19190214574337006, "learning_rate": 5.500000000000001e-05, "loss": 1.2373, "step": 52 }, { "epoch": 1.3822330888345558, "grad_norm": 0.20375579595565796, "learning_rate": 5.352764127302008e-05, "loss": 1.3094, "step": 53 }, { "epoch": 1.4083129584352079, "grad_norm": 0.22090604901313782, "learning_rate": 5.205685918464356e-05, "loss": 1.2524, "step": 54 }, { "epoch": 1.4343928280358598, "grad_norm": 0.2642119526863098, "learning_rate": 5.058922868516978e-05, "loss": 1.3616, "step": 55 }, { "epoch": 1.4604726976365119, "grad_norm": 0.34405508637428284, "learning_rate": 4.912632135009769e-05, "loss": 1.5443, "step": 56 }, { "epoch": 1.4865525672371638, "grad_norm": 0.5865151882171631, "learning_rate": 4.7669703697243516e-05, "loss": 1.7222, "step": 57 }, { "epoch": 1.5126324368378157, "grad_norm": 0.3257847726345062, "learning_rate": 4.6220935509274235e-05, "loss": 1.0614, "step": 58 }, { "epoch": 1.5387123064384678, "grad_norm": 0.15625827014446259, "learning_rate": 4.478156816345321e-05, "loss": 1.1532, "step": 59 }, { "epoch": 1.56479217603912, "grad_norm": 0.17408806085586548, "learning_rate": 4.3353142970386564e-05, "loss": 1.185, "step": 60 }, { "epoch": 1.5908720456397718, "grad_norm": 0.18832173943519592, "learning_rate": 4.19371895235492e-05, "loss": 1.1786, "step": 61 }, { "epoch": 1.6169519152404237, "grad_norm": 0.19872711598873138, "learning_rate": 4.053522406135775e-05, "loss": 1.2668, "step": 62 }, { "epoch": 1.6430317848410758, "grad_norm": 0.22072260081768036, "learning_rate": 3.9148747843544495e-05, "loss": 1.2622, "step": 63 }, { "epoch": 1.669111654441728, "grad_norm": 0.25179487466812134, "learning_rate": 3.777924554357096e-05, "loss": 1.3652, "step": 64 }, { "epoch": 1.6951915240423798, "grad_norm": 0.28402817249298096, "learning_rate": 3.642818365880224e-05, "loss": 1.4035, "step": 65 }, { "epoch": 1.7212713936430317, "grad_norm": 0.4112336337566376, "learning_rate": 3.509700894014496e-05, "loss": 1.5314, "step": 66 }, { "epoch": 1.7473512632436838, "grad_norm": 0.24410173296928406, "learning_rate": 3.378714684283011e-05, "loss": 0.8069, "step": 67 }, { "epoch": 1.7734311328443357, "grad_norm": 0.20584547519683838, "learning_rate": 3.250000000000001e-05, "loss": 1.4517, "step": 68 }, { "epoch": 1.7995110024449876, "grad_norm": 0.17557083070278168, "learning_rate": 3.123694672073344e-05, "loss": 1.216, "step": 69 }, { "epoch": 1.8255908720456397, "grad_norm": 0.18803009390830994, "learning_rate": 2.9999339514117912e-05, "loss": 1.2143, "step": 70 }, { "epoch": 1.8516707416462919, "grad_norm": 0.19694343209266663, "learning_rate": 2.8788503640948912e-05, "loss": 1.2274, "step": 71 }, { "epoch": 1.8777506112469438, "grad_norm": 0.20662301778793335, "learning_rate": 2.760573569460757e-05, "loss": 1.2995, "step": 72 }, { "epoch": 1.9038304808475957, "grad_norm": 0.22804853320121765, "learning_rate": 2.645230221263596e-05, "loss": 1.2843, "step": 73 }, { "epoch": 1.9299103504482478, "grad_norm": 0.2707391679286957, "learning_rate": 2.53294383204969e-05, "loss": 1.2768, "step": 74 }, { "epoch": 1.9559902200488999, "grad_norm": 0.37075576186180115, "learning_rate": 2.423834640897079e-05, "loss": 1.5192, "step": 75 }, { "epoch": 1.9559902200488999, "eval_loss": 1.2681915760040283, "eval_runtime": 0.4943, "eval_samples_per_second": 101.15, "eval_steps_per_second": 26.299, "step": 75 }, { "epoch": 1.9820700896495518, "grad_norm": 0.6423049569129944, "learning_rate": 2.3180194846605367e-05, "loss": 1.6123, "step": 76 }, { "epoch": 2.0081499592502037, "grad_norm": 0.43415480852127075, "learning_rate": 2.215611672859741e-05, "loss": 1.9664, "step": 77 }, { "epoch": 2.034229828850856, "grad_norm": 0.150677889585495, "learning_rate": 2.1167208663446025e-05, "loss": 1.0656, "step": 78 }, { "epoch": 2.060309698451508, "grad_norm": 0.17479360103607178, "learning_rate": 2.0214529598676836e-05, "loss": 1.1818, "step": 79 }, { "epoch": 2.0863895680521596, "grad_norm": 0.18362055718898773, "learning_rate": 1.9299099686894423e-05, "loss": 1.1829, "step": 80 }, { "epoch": 2.1124694376528117, "grad_norm": 0.20285598933696747, "learning_rate": 1.842189919337732e-05, "loss": 1.2863, "step": 81 }, { "epoch": 2.138549307253464, "grad_norm": 0.21474912762641907, "learning_rate": 1.758386744638546e-05, "loss": 1.3106, "step": 82 }, { "epoch": 2.164629176854116, "grad_norm": 0.24042028188705444, "learning_rate": 1.6785901831303956e-05, "loss": 1.2384, "step": 83 }, { "epoch": 2.1907090464547676, "grad_norm": 0.2615673243999481, "learning_rate": 1.602885682970026e-05, "loss": 1.328, "step": 84 }, { "epoch": 2.2167889160554197, "grad_norm": 0.377523273229599, "learning_rate": 1.531354310432403e-05, "loss": 1.5058, "step": 85 }, { "epoch": 2.242868785656072, "grad_norm": 0.32665538787841797, "learning_rate": 1.464072663102903e-05, "loss": 0.9337, "step": 86 }, { "epoch": 2.2689486552567235, "grad_norm": 0.20789040625095367, "learning_rate": 1.4011127878547087e-05, "loss": 1.4827, "step": 87 }, { "epoch": 2.2950285248573756, "grad_norm": 0.17642174661159515, "learning_rate": 1.3425421036992098e-05, "loss": 1.2114, "step": 88 }, { "epoch": 2.3211083944580277, "grad_norm": 0.17508623003959656, "learning_rate": 1.2884233295920353e-05, "loss": 1.2065, "step": 89 }, { "epoch": 2.34718826405868, "grad_norm": 0.18353621661663055, "learning_rate": 1.2388144172720251e-05, "loss": 1.2285, "step": 90 }, { "epoch": 2.3732681336593315, "grad_norm": 0.20404478907585144, "learning_rate": 1.1937684892050604e-05, "loss": 1.2864, "step": 91 }, { "epoch": 2.3993480032599837, "grad_norm": 0.22320792078971863, "learning_rate": 1.1533337816991932e-05, "loss": 1.2372, "step": 92 }, { "epoch": 2.4254278728606358, "grad_norm": 0.25220105051994324, "learning_rate": 1.1175535932519987e-05, "loss": 1.3136, "step": 93 }, { "epoch": 2.451507742461288, "grad_norm": 0.3090400695800781, "learning_rate": 1.0864662381854632e-05, "loss": 1.364, "step": 94 }, { "epoch": 2.4775876120619396, "grad_norm": 0.4767124056816101, "learning_rate": 1.0601050056180447e-05, "loss": 1.6821, "step": 95 }, { "epoch": 2.5036674816625917, "grad_norm": 0.4842507839202881, "learning_rate": 1.0384981238178534e-05, "loss": 1.1292, "step": 96 }, { "epoch": 2.529747351263244, "grad_norm": 0.14696048200130463, "learning_rate": 1.0216687299751144e-05, "loss": 1.1158, "step": 97 }, { "epoch": 2.555827220863896, "grad_norm": 0.37997448444366455, "learning_rate": 1.0096348454262845e-05, "loss": 1.2302, "step": 98 }, { "epoch": 2.5819070904645476, "grad_norm": 0.1806773990392685, "learning_rate": 1.0024093563563546e-05, "loss": 1.1778, "step": 99 }, { "epoch": 2.6079869600651997, "grad_norm": 0.20082679390907288, "learning_rate": 1e-05, "loss": 1.253, "step": 100 }, { "epoch": 2.6079869600651997, "eval_loss": 1.2625093460083008, "eval_runtime": 0.4948, "eval_samples_per_second": 101.042, "eval_steps_per_second": 26.271, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.08890810679296e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }