{ "best_metric": 2.3117122650146484, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.01448225923244026, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005792903692976104, "grad_norm": 2.838529109954834, "learning_rate": 2e-05, "loss": 12.5795, "step": 1 }, { "epoch": 0.0005792903692976104, "eval_loss": 3.121882677078247, "eval_runtime": 104.4145, "eval_samples_per_second": 6.963, "eval_steps_per_second": 3.486, "step": 1 }, { "epoch": 0.0011585807385952208, "grad_norm": 3.1008856296539307, "learning_rate": 4e-05, "loss": 12.8641, "step": 2 }, { "epoch": 0.0017378711078928314, "grad_norm": 3.484205484390259, "learning_rate": 6e-05, "loss": 11.9847, "step": 3 }, { "epoch": 0.0023171614771904415, "grad_norm": 3.1738972663879395, "learning_rate": 8e-05, "loss": 13.3681, "step": 4 }, { "epoch": 0.002896451846488052, "grad_norm": 2.6525840759277344, "learning_rate": 0.0001, "loss": 10.3859, "step": 5 }, { "epoch": 0.002896451846488052, "eval_loss": 3.0464909076690674, "eval_runtime": 102.3038, "eval_samples_per_second": 7.106, "eval_steps_per_second": 3.558, "step": 5 }, { "epoch": 0.0034757422157856628, "grad_norm": 4.349057197570801, "learning_rate": 0.00012, "loss": 13.4115, "step": 6 }, { "epoch": 0.004055032585083273, "grad_norm": 4.680722236633301, "learning_rate": 0.00014, "loss": 12.9696, "step": 7 }, { "epoch": 0.004634322954380883, "grad_norm": 3.7882301807403564, "learning_rate": 0.00016, "loss": 10.0891, "step": 8 }, { "epoch": 0.0052136133236784935, "grad_norm": 4.43908166885376, "learning_rate": 0.00018, "loss": 12.1125, "step": 9 }, { "epoch": 0.005792903692976104, "grad_norm": 3.733471393585205, "learning_rate": 0.0002, "loss": 9.6618, "step": 10 }, { "epoch": 0.005792903692976104, "eval_loss": 2.685049295425415, "eval_runtime": 101.5745, "eval_samples_per_second": 7.157, "eval_steps_per_second": 3.584, "step": 10 }, { "epoch": 0.006372194062273715, "grad_norm": 3.624699592590332, "learning_rate": 0.00019781476007338058, "loss": 10.4661, "step": 11 }, { "epoch": 0.0069514844315713255, "grad_norm": 4.032886981964111, "learning_rate": 0.0001913545457642601, "loss": 11.1481, "step": 12 }, { "epoch": 0.007530774800868936, "grad_norm": 3.667426347732544, "learning_rate": 0.00018090169943749476, "loss": 10.4183, "step": 13 }, { "epoch": 0.008110065170166545, "grad_norm": 3.7377707958221436, "learning_rate": 0.00016691306063588583, "loss": 10.7956, "step": 14 }, { "epoch": 0.008689355539464157, "grad_norm": 5.357843399047852, "learning_rate": 0.00015000000000000001, "loss": 9.9671, "step": 15 }, { "epoch": 0.008689355539464157, "eval_loss": 2.4640743732452393, "eval_runtime": 101.2592, "eval_samples_per_second": 7.18, "eval_steps_per_second": 3.595, "step": 15 }, { "epoch": 0.009268645908761766, "grad_norm": 3.6775741577148438, "learning_rate": 0.00013090169943749476, "loss": 8.4698, "step": 16 }, { "epoch": 0.009847936278059377, "grad_norm": 4.87345027923584, "learning_rate": 0.00011045284632676536, "loss": 9.1637, "step": 17 }, { "epoch": 0.010427226647356987, "grad_norm": 5.144161224365234, "learning_rate": 8.954715367323468e-05, "loss": 10.2837, "step": 18 }, { "epoch": 0.011006517016654598, "grad_norm": 4.957813262939453, "learning_rate": 6.909830056250527e-05, "loss": 9.8878, "step": 19 }, { "epoch": 0.011585807385952208, "grad_norm": 4.979409694671631, "learning_rate": 5.000000000000002e-05, "loss": 8.0892, "step": 20 }, { "epoch": 0.011585807385952208, "eval_loss": 2.3354403972625732, "eval_runtime": 101.2972, "eval_samples_per_second": 7.177, "eval_steps_per_second": 3.593, "step": 20 }, { "epoch": 0.012165097755249819, "grad_norm": 4.596272945404053, "learning_rate": 3.308693936411421e-05, "loss": 9.8555, "step": 21 }, { "epoch": 0.01274438812454743, "grad_norm": 6.18507194519043, "learning_rate": 1.9098300562505266e-05, "loss": 10.8075, "step": 22 }, { "epoch": 0.01332367849384504, "grad_norm": 3.897860288619995, "learning_rate": 8.645454235739903e-06, "loss": 9.3181, "step": 23 }, { "epoch": 0.013902968863142651, "grad_norm": 4.420831680297852, "learning_rate": 2.1852399266194314e-06, "loss": 10.5026, "step": 24 }, { "epoch": 0.01448225923244026, "grad_norm": 4.4558892250061035, "learning_rate": 0.0, "loss": 10.2569, "step": 25 }, { "epoch": 0.01448225923244026, "eval_loss": 2.3117122650146484, "eval_runtime": 101.5659, "eval_samples_per_second": 7.158, "eval_steps_per_second": 3.584, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7203333529927680.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }