|
{ |
|
"best_metric": 0.26686012744903564, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.7733204446592556, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015466408893185114, |
|
"grad_norm": 0.34002751111984253, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.4411, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.015466408893185114, |
|
"eval_loss": 1.1426082849502563, |
|
"eval_runtime": 7.1707, |
|
"eval_samples_per_second": 6.973, |
|
"eval_steps_per_second": 6.973, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.030932817786370227, |
|
"grad_norm": 0.4139275550842285, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.4555, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04639922667955534, |
|
"grad_norm": 0.481924831867218, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.424, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.061865635572740454, |
|
"grad_norm": 0.4299992322921753, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.3855, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07733204446592556, |
|
"grad_norm": 0.42819246649742126, |
|
"learning_rate": 0.00015, |
|
"loss": 0.4611, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09279845335911067, |
|
"grad_norm": 0.9149371385574341, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.7301, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1082648622522958, |
|
"grad_norm": 1.1431907415390015, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.7905, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.12373127114548091, |
|
"grad_norm": 1.802942156791687, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.6581, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.13919768003866603, |
|
"grad_norm": 2.6315770149230957, |
|
"learning_rate": 0.00027, |
|
"loss": 0.6625, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.15466408893185113, |
|
"grad_norm": 1.5995557308197021, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5518, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17013049782503625, |
|
"grad_norm": 1.1412254571914673, |
|
"learning_rate": 0.00029994859874633357, |
|
"loss": 0.4248, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.18559690671822135, |
|
"grad_norm": 1.1108283996582031, |
|
"learning_rate": 0.000299794430213186, |
|
"loss": 0.4044, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.20106331561140647, |
|
"grad_norm": 1.4058749675750732, |
|
"learning_rate": 0.00029953760005996916, |
|
"loss": 0.4258, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2165297245045916, |
|
"grad_norm": 0.8541085124015808, |
|
"learning_rate": 0.00029917828430524096, |
|
"loss": 0.419, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2319961333977767, |
|
"grad_norm": 0.7577608823776245, |
|
"learning_rate": 0.00029871672920607153, |
|
"loss": 0.4055, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24746254229096182, |
|
"grad_norm": 0.8010592460632324, |
|
"learning_rate": 0.00029815325108927063, |
|
"loss": 0.4587, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.26292895118414694, |
|
"grad_norm": 2.178513526916504, |
|
"learning_rate": 0.00029748823613459316, |
|
"loss": 0.3844, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.27839536007733207, |
|
"grad_norm": 1.049085021018982, |
|
"learning_rate": 0.0002967221401100708, |
|
"loss": 0.3085, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.29386176897051713, |
|
"grad_norm": 0.2664034962654114, |
|
"learning_rate": 0.0002958554880596515, |
|
"loss": 0.2314, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.30932817786370226, |
|
"grad_norm": 0.9501022696495056, |
|
"learning_rate": 0.0002948888739433602, |
|
"loss": 0.2162, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3247945867568874, |
|
"grad_norm": 0.29297515749931335, |
|
"learning_rate": 0.00029382296023022894, |
|
"loss": 0.1734, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3402609956500725, |
|
"grad_norm": 0.29325342178344727, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.2472, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.35572740454325763, |
|
"grad_norm": 0.7397837042808533, |
|
"learning_rate": 0.00029139622366382674, |
|
"loss": 0.3872, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3711938134364427, |
|
"grad_norm": 0.5978482365608215, |
|
"learning_rate": 0.00029003706397458023, |
|
"loss": 0.3756, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3866602223296278, |
|
"grad_norm": 0.8143917918205261, |
|
"learning_rate": 0.000288581929876693, |
|
"loss": 0.3305, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3866602223296278, |
|
"eval_loss": 0.2919376492500305, |
|
"eval_runtime": 7.3131, |
|
"eval_samples_per_second": 6.837, |
|
"eval_steps_per_second": 6.837, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.40212663122281295, |
|
"grad_norm": 0.5971204042434692, |
|
"learning_rate": 0.0002870318186463901, |
|
"loss": 0.329, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.41759304011599807, |
|
"grad_norm": 0.5656357407569885, |
|
"learning_rate": 0.0002853877926524791, |
|
"loss": 0.2991, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4330594490091832, |
|
"grad_norm": 0.9046909809112549, |
|
"learning_rate": 0.00028365097862825513, |
|
"loss": 0.3522, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4485258579023683, |
|
"grad_norm": 1.8343451023101807, |
|
"learning_rate": 0.00028182256689929475, |
|
"loss": 0.3253, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4639922667955534, |
|
"grad_norm": 1.3387500047683716, |
|
"learning_rate": 0.0002799038105676658, |
|
"loss": 0.3363, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4794586756887385, |
|
"grad_norm": 0.7546955943107605, |
|
"learning_rate": 0.0002778960246531138, |
|
"loss": 0.3319, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.49492508458192364, |
|
"grad_norm": 0.5383890271186829, |
|
"learning_rate": 0.0002758005851918136, |
|
"loss": 0.4198, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5103914934751087, |
|
"grad_norm": 2.9426941871643066, |
|
"learning_rate": 0.0002736189282933023, |
|
"loss": 0.3846, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5258579023682939, |
|
"grad_norm": 1.3911627531051636, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.2681, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.541324311261479, |
|
"grad_norm": 0.33964619040489197, |
|
"learning_rate": 0.00026900300104368524, |
|
"loss": 0.2078, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5567907201546641, |
|
"grad_norm": 0.6098275780677795, |
|
"learning_rate": 0.0002665718942185456, |
|
"loss": 0.2027, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5722571290478492, |
|
"grad_norm": 0.5025777816772461, |
|
"learning_rate": 0.00026406089484000466, |
|
"loss": 0.206, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5877235379410343, |
|
"grad_norm": 0.4298511743545532, |
|
"learning_rate": 0.00026147172382160914, |
|
"loss": 0.2803, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6031899468342194, |
|
"grad_norm": 0.9432958364486694, |
|
"learning_rate": 0.00025880615565184313, |
|
"loss": 0.3849, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6186563557274045, |
|
"grad_norm": 0.6062995791435242, |
|
"learning_rate": 0.00025606601717798207, |
|
"loss": 0.3508, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6341227646205897, |
|
"grad_norm": 0.5804493427276611, |
|
"learning_rate": 0.0002532531863540631, |
|
"loss": 0.3212, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6495891735137748, |
|
"grad_norm": 0.5816200375556946, |
|
"learning_rate": 0.0002503695909538287, |
|
"loss": 0.2876, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6650555824069598, |
|
"grad_norm": 0.4420316219329834, |
|
"learning_rate": 0.0002474172072495275, |
|
"loss": 0.3054, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.680521991300145, |
|
"grad_norm": 0.45776253938674927, |
|
"learning_rate": 0.0002443980586574756, |
|
"loss": 0.279, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6959884001933301, |
|
"grad_norm": 0.7748484015464783, |
|
"learning_rate": 0.00024131421435130807, |
|
"loss": 0.2875, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7114548090865153, |
|
"grad_norm": 0.5803005695343018, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.3088, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7269212179797003, |
|
"grad_norm": 1.1293902397155762, |
|
"learning_rate": 0.0002349609355387249, |
|
"loss": 0.3188, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7423876268728854, |
|
"grad_norm": 1.2384657859802246, |
|
"learning_rate": 0.00023169585525225405, |
|
"loss": 0.4065, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7578540357660706, |
|
"grad_norm": 1.8807259798049927, |
|
"learning_rate": 0.0002283747847073923, |
|
"loss": 0.2961, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7733204446592556, |
|
"grad_norm": 0.7909408211708069, |
|
"learning_rate": 0.000225, |
|
"loss": 0.2371, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7733204446592556, |
|
"eval_loss": 0.26686012744903564, |
|
"eval_runtime": 7.3174, |
|
"eval_samples_per_second": 6.833, |
|
"eval_steps_per_second": 6.833, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.89733217355694e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|