{ "best_metric": 0.26686012744903564, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.7733204446592556, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015466408893185114, "grad_norm": 0.34002751111984253, "learning_rate": 2.9999999999999997e-05, "loss": 0.4411, "step": 1 }, { "epoch": 0.015466408893185114, "eval_loss": 1.1426082849502563, "eval_runtime": 7.1707, "eval_samples_per_second": 6.973, "eval_steps_per_second": 6.973, "step": 1 }, { "epoch": 0.030932817786370227, "grad_norm": 0.4139275550842285, "learning_rate": 5.9999999999999995e-05, "loss": 0.4555, "step": 2 }, { "epoch": 0.04639922667955534, "grad_norm": 0.481924831867218, "learning_rate": 8.999999999999999e-05, "loss": 0.424, "step": 3 }, { "epoch": 0.061865635572740454, "grad_norm": 0.4299992322921753, "learning_rate": 0.00011999999999999999, "loss": 0.3855, "step": 4 }, { "epoch": 0.07733204446592556, "grad_norm": 0.42819246649742126, "learning_rate": 0.00015, "loss": 0.4611, "step": 5 }, { "epoch": 0.09279845335911067, "grad_norm": 0.9149371385574341, "learning_rate": 0.00017999999999999998, "loss": 0.7301, "step": 6 }, { "epoch": 0.1082648622522958, "grad_norm": 1.1431907415390015, "learning_rate": 0.00020999999999999998, "loss": 0.7905, "step": 7 }, { "epoch": 0.12373127114548091, "grad_norm": 1.802942156791687, "learning_rate": 0.00023999999999999998, "loss": 0.6581, "step": 8 }, { "epoch": 0.13919768003866603, "grad_norm": 2.6315770149230957, "learning_rate": 0.00027, "loss": 0.6625, "step": 9 }, { "epoch": 0.15466408893185113, "grad_norm": 1.5995557308197021, "learning_rate": 0.0003, "loss": 0.5518, "step": 10 }, { "epoch": 0.17013049782503625, "grad_norm": 1.1412254571914673, "learning_rate": 0.00029994859874633357, "loss": 0.4248, "step": 11 }, { "epoch": 0.18559690671822135, "grad_norm": 1.1108283996582031, "learning_rate": 0.000299794430213186, "loss": 0.4044, "step": 12 }, { "epoch": 0.20106331561140647, "grad_norm": 1.4058749675750732, "learning_rate": 0.00029953760005996916, "loss": 0.4258, "step": 13 }, { "epoch": 0.2165297245045916, "grad_norm": 0.8541085124015808, "learning_rate": 0.00029917828430524096, "loss": 0.419, "step": 14 }, { "epoch": 0.2319961333977767, "grad_norm": 0.7577608823776245, "learning_rate": 0.00029871672920607153, "loss": 0.4055, "step": 15 }, { "epoch": 0.24746254229096182, "grad_norm": 0.8010592460632324, "learning_rate": 0.00029815325108927063, "loss": 0.4587, "step": 16 }, { "epoch": 0.26292895118414694, "grad_norm": 2.178513526916504, "learning_rate": 0.00029748823613459316, "loss": 0.3844, "step": 17 }, { "epoch": 0.27839536007733207, "grad_norm": 1.049085021018982, "learning_rate": 0.0002967221401100708, "loss": 0.3085, "step": 18 }, { "epoch": 0.29386176897051713, "grad_norm": 0.2664034962654114, "learning_rate": 0.0002958554880596515, "loss": 0.2314, "step": 19 }, { "epoch": 0.30932817786370226, "grad_norm": 0.9501022696495056, "learning_rate": 0.0002948888739433602, "loss": 0.2162, "step": 20 }, { "epoch": 0.3247945867568874, "grad_norm": 0.29297515749931335, "learning_rate": 0.00029382296023022894, "loss": 0.1734, "step": 21 }, { "epoch": 0.3402609956500725, "grad_norm": 0.29325342178344727, "learning_rate": 0.00029265847744427303, "loss": 0.2472, "step": 22 }, { "epoch": 0.35572740454325763, "grad_norm": 0.7397837042808533, "learning_rate": 0.00029139622366382674, "loss": 0.3872, "step": 23 }, { "epoch": 0.3711938134364427, "grad_norm": 0.5978482365608215, "learning_rate": 0.00029003706397458023, "loss": 0.3756, "step": 24 }, { "epoch": 0.3866602223296278, "grad_norm": 0.8143917918205261, "learning_rate": 0.000288581929876693, "loss": 0.3305, "step": 25 }, { "epoch": 0.3866602223296278, "eval_loss": 0.2919376492500305, "eval_runtime": 7.3131, "eval_samples_per_second": 6.837, "eval_steps_per_second": 6.837, "step": 25 }, { "epoch": 0.40212663122281295, "grad_norm": 0.5971204042434692, "learning_rate": 0.0002870318186463901, "loss": 0.329, "step": 26 }, { "epoch": 0.41759304011599807, "grad_norm": 0.5656357407569885, "learning_rate": 0.0002853877926524791, "loss": 0.2991, "step": 27 }, { "epoch": 0.4330594490091832, "grad_norm": 0.9046909809112549, "learning_rate": 0.00028365097862825513, "loss": 0.3522, "step": 28 }, { "epoch": 0.4485258579023683, "grad_norm": 1.8343451023101807, "learning_rate": 0.00028182256689929475, "loss": 0.3253, "step": 29 }, { "epoch": 0.4639922667955534, "grad_norm": 1.3387500047683716, "learning_rate": 0.0002799038105676658, "loss": 0.3363, "step": 30 }, { "epoch": 0.4794586756887385, "grad_norm": 0.7546955943107605, "learning_rate": 0.0002778960246531138, "loss": 0.3319, "step": 31 }, { "epoch": 0.49492508458192364, "grad_norm": 0.5383890271186829, "learning_rate": 0.0002758005851918136, "loss": 0.4198, "step": 32 }, { "epoch": 0.5103914934751087, "grad_norm": 2.9426941871643066, "learning_rate": 0.0002736189282933023, "loss": 0.3846, "step": 33 }, { "epoch": 0.5258579023682939, "grad_norm": 1.3911627531051636, "learning_rate": 0.0002713525491562421, "loss": 0.2681, "step": 34 }, { "epoch": 0.541324311261479, "grad_norm": 0.33964619040489197, "learning_rate": 0.00026900300104368524, "loss": 0.2078, "step": 35 }, { "epoch": 0.5567907201546641, "grad_norm": 0.6098275780677795, "learning_rate": 0.0002665718942185456, "loss": 0.2027, "step": 36 }, { "epoch": 0.5722571290478492, "grad_norm": 0.5025777816772461, "learning_rate": 0.00026406089484000466, "loss": 0.206, "step": 37 }, { "epoch": 0.5877235379410343, "grad_norm": 0.4298511743545532, "learning_rate": 0.00026147172382160914, "loss": 0.2803, "step": 38 }, { "epoch": 0.6031899468342194, "grad_norm": 0.9432958364486694, "learning_rate": 0.00025880615565184313, "loss": 0.3849, "step": 39 }, { "epoch": 0.6186563557274045, "grad_norm": 0.6062995791435242, "learning_rate": 0.00025606601717798207, "loss": 0.3508, "step": 40 }, { "epoch": 0.6341227646205897, "grad_norm": 0.5804493427276611, "learning_rate": 0.0002532531863540631, "loss": 0.3212, "step": 41 }, { "epoch": 0.6495891735137748, "grad_norm": 0.5816200375556946, "learning_rate": 0.0002503695909538287, "loss": 0.2876, "step": 42 }, { "epoch": 0.6650555824069598, "grad_norm": 0.4420316219329834, "learning_rate": 0.0002474172072495275, "loss": 0.3054, "step": 43 }, { "epoch": 0.680521991300145, "grad_norm": 0.45776253938674927, "learning_rate": 0.0002443980586574756, "loss": 0.279, "step": 44 }, { "epoch": 0.6959884001933301, "grad_norm": 0.7748484015464783, "learning_rate": 0.00024131421435130807, "loss": 0.2875, "step": 45 }, { "epoch": 0.7114548090865153, "grad_norm": 0.5803005695343018, "learning_rate": 0.00023816778784387094, "loss": 0.3088, "step": 46 }, { "epoch": 0.7269212179797003, "grad_norm": 1.1293902397155762, "learning_rate": 0.0002349609355387249, "loss": 0.3188, "step": 47 }, { "epoch": 0.7423876268728854, "grad_norm": 1.2384657859802246, "learning_rate": 0.00023169585525225405, "loss": 0.4065, "step": 48 }, { "epoch": 0.7578540357660706, "grad_norm": 1.8807259798049927, "learning_rate": 0.0002283747847073923, "loss": 0.2961, "step": 49 }, { "epoch": 0.7733204446592556, "grad_norm": 0.7909408211708069, "learning_rate": 0.000225, "loss": 0.2371, "step": 50 }, { "epoch": 0.7733204446592556, "eval_loss": 0.26686012744903564, "eval_runtime": 7.3174, "eval_samples_per_second": 6.833, "eval_steps_per_second": 6.833, "step": 50 } ], "logging_steps": 1, "max_steps": 130, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.89733217355694e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }