{ "best_metric": 2.2425832748413086, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.6517311608961304, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013034623217922607, "grad_norm": 0.6276217103004456, "learning_rate": 7.5e-05, "loss": 2.9674, "step": 1 }, { "epoch": 0.013034623217922607, "eval_loss": 3.5546200275421143, "eval_runtime": 0.2814, "eval_samples_per_second": 177.672, "eval_steps_per_second": 46.195, "step": 1 }, { "epoch": 0.026069246435845215, "grad_norm": 0.6501481533050537, "learning_rate": 0.00015, "loss": 3.2018, "step": 2 }, { "epoch": 0.039103869653767824, "grad_norm": 0.5839198231697083, "learning_rate": 0.000225, "loss": 3.3157, "step": 3 }, { "epoch": 0.05213849287169043, "grad_norm": 0.5366824865341187, "learning_rate": 0.0003, "loss": 3.3337, "step": 4 }, { "epoch": 0.06517311608961303, "grad_norm": 0.4950920641422272, "learning_rate": 0.0002999703922691041, "loss": 3.3257, "step": 5 }, { "epoch": 0.07820773930753565, "grad_norm": 0.4459711015224457, "learning_rate": 0.00029988158206334587, "loss": 3.2473, "step": 6 }, { "epoch": 0.09124236252545825, "grad_norm": 0.5029334425926208, "learning_rate": 0.00029973360833781664, "loss": 3.251, "step": 7 }, { "epoch": 0.10427698574338086, "grad_norm": 0.5529475808143616, "learning_rate": 0.0002995265359986831, "loss": 3.212, "step": 8 }, { "epoch": 0.11731160896130347, "grad_norm": 0.6158551573753357, "learning_rate": 0.00029926045587471686, "loss": 3.1893, "step": 9 }, { "epoch": 0.13034623217922606, "grad_norm": 0.6573907136917114, "learning_rate": 0.0002989354846774545, "loss": 3.1782, "step": 10 }, { "epoch": 0.14338085539714868, "grad_norm": 0.6919009685516357, "learning_rate": 0.0002985517649500034, "loss": 3.0629, "step": 11 }, { "epoch": 0.1564154786150713, "grad_norm": 0.7953146696090698, "learning_rate": 0.00029810946500451814, "loss": 3.132, "step": 12 }, { "epoch": 0.1694501018329939, "grad_norm": 0.9705007672309875, "learning_rate": 0.00029760877884837294, "loss": 2.7258, "step": 13 }, { "epoch": 0.1824847250509165, "grad_norm": 0.9804863333702087, "learning_rate": 0.0002970499260990637, "loss": 2.7512, "step": 14 }, { "epoch": 0.1955193482688391, "grad_norm": 0.6821589469909668, "learning_rate": 0.0002964331518878766, "loss": 2.6437, "step": 15 }, { "epoch": 0.20855397148676172, "grad_norm": 0.4936249554157257, "learning_rate": 0.0002957587267523652, "loss": 2.6137, "step": 16 }, { "epoch": 0.2215885947046843, "grad_norm": 0.5652141571044922, "learning_rate": 0.00029502694651768383, "loss": 2.6432, "step": 17 }, { "epoch": 0.23462321792260693, "grad_norm": 0.6153628826141357, "learning_rate": 0.000294238132166829, "loss": 2.6485, "step": 18 }, { "epoch": 0.24765784114052952, "grad_norm": 0.4337889552116394, "learning_rate": 0.0002933926296998457, "loss": 2.5919, "step": 19 }, { "epoch": 0.2606924643584521, "grad_norm": 0.5410802364349365, "learning_rate": 0.0002924908099820599, "loss": 2.6103, "step": 20 }, { "epoch": 0.27372708757637476, "grad_norm": 0.5030240416526794, "learning_rate": 0.00029153306858140533, "loss": 2.6104, "step": 21 }, { "epoch": 0.28676171079429735, "grad_norm": 0.38985803723335266, "learning_rate": 0.00029051982559491393, "loss": 2.5794, "step": 22 }, { "epoch": 0.29979633401221994, "grad_norm": 0.5979215502738953, "learning_rate": 0.00028945152546444754, "loss": 2.6287, "step": 23 }, { "epoch": 0.3128309572301426, "grad_norm": 0.5737847089767456, "learning_rate": 0.0002883286367817511, "loss": 2.6541, "step": 24 }, { "epoch": 0.3258655804480652, "grad_norm": 0.572758138179779, "learning_rate": 0.00028715165208291265, "loss": 2.6881, "step": 25 }, { "epoch": 0.3258655804480652, "eval_loss": 2.5153815746307373, "eval_runtime": 0.2797, "eval_samples_per_second": 178.771, "eval_steps_per_second": 46.481, "step": 25 }, { "epoch": 0.3389002036659878, "grad_norm": 1.4206351041793823, "learning_rate": 0.0002859210876323207, "loss": 2.4658, "step": 26 }, { "epoch": 0.35193482688391037, "grad_norm": 1.5302618741989136, "learning_rate": 0.00028463748319621396, "loss": 2.5304, "step": 27 }, { "epoch": 0.364969450101833, "grad_norm": 0.9723348021507263, "learning_rate": 0.00028330140180592156, "loss": 2.4213, "step": 28 }, { "epoch": 0.3780040733197556, "grad_norm": 0.4521373212337494, "learning_rate": 0.0002819134295108992, "loss": 2.4048, "step": 29 }, { "epoch": 0.3910386965376782, "grad_norm": 0.6521058678627014, "learning_rate": 0.00028047417512166837, "loss": 2.4019, "step": 30 }, { "epoch": 0.4040733197556008, "grad_norm": 0.7668533325195312, "learning_rate": 0.00027898426994277204, "loss": 2.4534, "step": 31 }, { "epoch": 0.41710794297352344, "grad_norm": 0.791877269744873, "learning_rate": 0.0002774443674958634, "loss": 2.4232, "step": 32 }, { "epoch": 0.43014256619144603, "grad_norm": 0.814365565776825, "learning_rate": 0.00027585514323305, "loss": 2.4323, "step": 33 }, { "epoch": 0.4431771894093686, "grad_norm": 0.6510496735572815, "learning_rate": 0.00027421729424061787, "loss": 2.3904, "step": 34 }, { "epoch": 0.45621181262729127, "grad_norm": 0.42264309525489807, "learning_rate": 0.00027253153893326646, "loss": 2.4016, "step": 35 }, { "epoch": 0.46924643584521386, "grad_norm": 0.5813013911247253, "learning_rate": 0.0002707986167389884, "loss": 2.4511, "step": 36 }, { "epoch": 0.48228105906313645, "grad_norm": 0.8300145864486694, "learning_rate": 0.0002690192877747315, "loss": 2.4128, "step": 37 }, { "epoch": 0.49531568228105904, "grad_norm": 0.7689920663833618, "learning_rate": 0.0002671943325129871, "loss": 2.3281, "step": 38 }, { "epoch": 0.5083503054989816, "grad_norm": 1.024122953414917, "learning_rate": 0.0002653245514394482, "loss": 2.3381, "step": 39 }, { "epoch": 0.5213849287169042, "grad_norm": 0.6759045720100403, "learning_rate": 0.0002634107647018905, "loss": 2.3008, "step": 40 }, { "epoch": 0.5344195519348269, "grad_norm": 0.3922083377838135, "learning_rate": 0.0002614538117504284, "loss": 2.2719, "step": 41 }, { "epoch": 0.5474541751527495, "grad_norm": 0.5142776966094971, "learning_rate": 0.0002594545509693043, "loss": 2.276, "step": 42 }, { "epoch": 0.5604887983706721, "grad_norm": 0.6373997926712036, "learning_rate": 0.00025741385930037295, "loss": 2.2602, "step": 43 }, { "epoch": 0.5735234215885947, "grad_norm": 0.6760398149490356, "learning_rate": 0.00025533263185844587, "loss": 2.3169, "step": 44 }, { "epoch": 0.5865580448065173, "grad_norm": 0.7617467045783997, "learning_rate": 0.00025321178153866423, "loss": 2.2778, "step": 45 }, { "epoch": 0.5995926680244399, "grad_norm": 0.7649106979370117, "learning_rate": 0.00025105223861607306, "loss": 2.2692, "step": 46 }, { "epoch": 0.6126272912423625, "grad_norm": 0.6090757250785828, "learning_rate": 0.0002488549503375719, "loss": 2.2845, "step": 47 }, { "epoch": 0.6256619144602852, "grad_norm": 0.45042526721954346, "learning_rate": 0.0002466208805064206, "loss": 2.2431, "step": 48 }, { "epoch": 0.6386965376782078, "grad_norm": 0.46896523237228394, "learning_rate": 0.00024435100905948387, "loss": 2.2985, "step": 49 }, { "epoch": 0.6517311608961304, "grad_norm": 0.5649595260620117, "learning_rate": 0.00024204633163739828, "loss": 2.4211, "step": 50 }, { "epoch": 0.6517311608961304, "eval_loss": 2.2425832748413086, "eval_runtime": 0.2798, "eval_samples_per_second": 178.69, "eval_steps_per_second": 46.46, "step": 50 } ], "logging_steps": 1, "max_steps": 154, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.22140137734144e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }