{ "best_metric": 0.4162628650665283, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.7586533902323376, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015173067804646752, "grad_norm": 17.388423919677734, "learning_rate": 2.9999999999999997e-05, "loss": 30.457, "step": 1 }, { "epoch": 0.015173067804646752, "eval_loss": 0.8401981592178345, "eval_runtime": 4.1216, "eval_samples_per_second": 12.131, "eval_steps_per_second": 12.131, "step": 1 }, { "epoch": 0.030346135609293504, "grad_norm": 10.178067207336426, "learning_rate": 5.9999999999999995e-05, "loss": 30.335, "step": 2 }, { "epoch": 0.04551920341394026, "grad_norm": 10.258736610412598, "learning_rate": 8.999999999999999e-05, "loss": 28.3123, "step": 3 }, { "epoch": 0.06069227121858701, "grad_norm": 10.296838760375977, "learning_rate": 0.00011999999999999999, "loss": 27.9287, "step": 4 }, { "epoch": 0.07586533902323377, "grad_norm": 12.867237091064453, "learning_rate": 0.00015, "loss": 25.3598, "step": 5 }, { "epoch": 0.09103840682788052, "grad_norm": 12.431685447692871, "learning_rate": 0.00017999999999999998, "loss": 22.2146, "step": 6 }, { "epoch": 0.10621147463252727, "grad_norm": 13.944931983947754, "learning_rate": 0.00020999999999999998, "loss": 19.7453, "step": 7 }, { "epoch": 0.12138454243717402, "grad_norm": 23.274085998535156, "learning_rate": 0.00023999999999999998, "loss": 19.6562, "step": 8 }, { "epoch": 0.13655761024182078, "grad_norm": 23.921234130859375, "learning_rate": 0.00027, "loss": 15.7789, "step": 9 }, { "epoch": 0.15173067804646753, "grad_norm": 31.0799560546875, "learning_rate": 0.0003, "loss": 18.6639, "step": 10 }, { "epoch": 0.16690374585111428, "grad_norm": 27.46465301513672, "learning_rate": 0.00029995027012714694, "loss": 15.0467, "step": 11 }, { "epoch": 0.18207681365576103, "grad_norm": 21.228845596313477, "learning_rate": 0.00029980111348272456, "loss": 16.0005, "step": 12 }, { "epoch": 0.19724988146040778, "grad_norm": 17.686410903930664, "learning_rate": 0.00029955262896727894, "loss": 15.2921, "step": 13 }, { "epoch": 0.21242294926505453, "grad_norm": 16.29988670349121, "learning_rate": 0.00029920498134218835, "loss": 14.4487, "step": 14 }, { "epoch": 0.22759601706970128, "grad_norm": 18.719757080078125, "learning_rate": 0.0002987584011204152, "loss": 12.782, "step": 15 }, { "epoch": 0.24276908487434803, "grad_norm": 19.07723617553711, "learning_rate": 0.0002982131844136615, "loss": 12.9955, "step": 16 }, { "epoch": 0.2579421526789948, "grad_norm": 45.31391143798828, "learning_rate": 0.0002975696927360274, "loss": 23.8883, "step": 17 }, { "epoch": 0.27311522048364156, "grad_norm": 27.076311111450195, "learning_rate": 0.0002968283527643036, "loss": 21.5994, "step": 18 }, { "epoch": 0.2882882882882883, "grad_norm": 15.616254806518555, "learning_rate": 0.00029598965605505737, "loss": 18.592, "step": 19 }, { "epoch": 0.30346135609293506, "grad_norm": 18.812135696411133, "learning_rate": 0.000295054158718698, "loss": 16.3996, "step": 20 }, { "epoch": 0.3186344238975818, "grad_norm": 14.7747163772583, "learning_rate": 0.0002940224810507402, "loss": 16.5481, "step": 21 }, { "epoch": 0.33380749170222856, "grad_norm": 13.129542350769043, "learning_rate": 0.00029289530712050735, "loss": 14.6397, "step": 22 }, { "epoch": 0.3489805595068753, "grad_norm": 13.189936637878418, "learning_rate": 0.0002916733843175492, "loss": 14.889, "step": 23 }, { "epoch": 0.36415362731152207, "grad_norm": 11.230732917785645, "learning_rate": 0.000290357522856074, "loss": 13.3495, "step": 24 }, { "epoch": 0.3793266951161688, "grad_norm": 10.45602798461914, "learning_rate": 0.0002889485952377242, "loss": 13.97, "step": 25 }, { "epoch": 0.3793266951161688, "eval_loss": 0.4615178406238556, "eval_runtime": 4.1945, "eval_samples_per_second": 11.92, "eval_steps_per_second": 11.92, "step": 25 }, { "epoch": 0.39449976292081557, "grad_norm": 9.400847434997559, "learning_rate": 0.0002874475356730507, "loss": 13.0953, "step": 26 }, { "epoch": 0.4096728307254623, "grad_norm": 10.215126037597656, "learning_rate": 0.0002858553394620707, "loss": 14.0127, "step": 27 }, { "epoch": 0.42484589853010907, "grad_norm": 8.064435958862305, "learning_rate": 0.0002841730623343193, "loss": 12.4928, "step": 28 }, { "epoch": 0.4400189663347558, "grad_norm": 10.052416801452637, "learning_rate": 0.00028240181974883207, "loss": 14.195, "step": 29 }, { "epoch": 0.45519203413940257, "grad_norm": 9.893036842346191, "learning_rate": 0.00028054278615452326, "loss": 12.9534, "step": 30 }, { "epoch": 0.4703651019440493, "grad_norm": 9.579009056091309, "learning_rate": 0.0002785971942114498, "loss": 11.9894, "step": 31 }, { "epoch": 0.48553816974869607, "grad_norm": 10.615213394165039, "learning_rate": 0.0002765663339734778, "loss": 11.5054, "step": 32 }, { "epoch": 0.5007112375533428, "grad_norm": 18.542469024658203, "learning_rate": 0.0002744515520328928, "loss": 17.3979, "step": 33 }, { "epoch": 0.5158843053579896, "grad_norm": 15.90185260772705, "learning_rate": 0.00027225425062752165, "loss": 16.0495, "step": 34 }, { "epoch": 0.5310573731626363, "grad_norm": 12.792567253112793, "learning_rate": 0.0002699758867109579, "loss": 15.2643, "step": 35 }, { "epoch": 0.5462304409672831, "grad_norm": 9.845219612121582, "learning_rate": 0.0002676179709865066, "loss": 13.5142, "step": 36 }, { "epoch": 0.5614035087719298, "grad_norm": 12.22781753540039, "learning_rate": 0.00026518206690549, "loss": 14.2178, "step": 37 }, { "epoch": 0.5765765765765766, "grad_norm": 10.745471954345703, "learning_rate": 0.0002626697896305779, "loss": 12.9431, "step": 38 }, { "epoch": 0.5917496443812233, "grad_norm": 10.67679500579834, "learning_rate": 0.00026008280496482984, "loss": 15.1994, "step": 39 }, { "epoch": 0.6069227121858701, "grad_norm": 8.882881164550781, "learning_rate": 0.000257422828247159, "loss": 14.0005, "step": 40 }, { "epoch": 0.6220957799905168, "grad_norm": 9.826411247253418, "learning_rate": 0.00025469162321495147, "loss": 12.5264, "step": 41 }, { "epoch": 0.6372688477951636, "grad_norm": 8.515768051147461, "learning_rate": 0.00025189100083459397, "loss": 11.0442, "step": 42 }, { "epoch": 0.6524419155998104, "grad_norm": 9.0559720993042, "learning_rate": 0.00024902281810068475, "loss": 11.7402, "step": 43 }, { "epoch": 0.6676149834044571, "grad_norm": 9.556861877441406, "learning_rate": 0.0002460889768047263, "loss": 12.4502, "step": 44 }, { "epoch": 0.6827880512091038, "grad_norm": 8.837124824523926, "learning_rate": 0.0002430914222741134, "loss": 12.4087, "step": 45 }, { "epoch": 0.6979611190137506, "grad_norm": 15.878302574157715, "learning_rate": 0.00024003214208225522, "loss": 11.0582, "step": 46 }, { "epoch": 0.7131341868183974, "grad_norm": 9.87769603729248, "learning_rate": 0.00023691316473068452, "loss": 11.3891, "step": 47 }, { "epoch": 0.7283072546230441, "grad_norm": 8.35490608215332, "learning_rate": 0.00023373655830402968, "loss": 9.9121, "step": 48 }, { "epoch": 0.7434803224276908, "grad_norm": 17.935434341430664, "learning_rate": 0.00023050442909874007, "loss": 16.9965, "step": 49 }, { "epoch": 0.7586533902323376, "grad_norm": 15.285179138183594, "learning_rate": 0.00022721892022647462, "loss": 16.4137, "step": 50 }, { "epoch": 0.7586533902323376, "eval_loss": 0.4162628650665283, "eval_runtime": 4.1979, "eval_samples_per_second": 11.911, "eval_steps_per_second": 11.911, "step": 50 } ], "logging_steps": 1, "max_steps": 132, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.95507593986048e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }