|
{ |
|
"best_metric": 0.47569820284843445, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 2.73972602739726, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0547945205479452, |
|
"grad_norm": 2.1911723613739014, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1243, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0547945205479452, |
|
"eval_loss": 2.322556972503662, |
|
"eval_runtime": 1.4644, |
|
"eval_samples_per_second": 83.993, |
|
"eval_steps_per_second": 10.926, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1095890410958904, |
|
"grad_norm": 2.2915894985198975, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2102, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1643835616438356, |
|
"grad_norm": 2.287838935852051, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 2.1496, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 2.2436702251434326, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 1.8866, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 2.225752592086792, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 1.474, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3287671232876712, |
|
"grad_norm": 2.5399177074432373, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 1.1843, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.3835616438356164, |
|
"grad_norm": 2.1273326873779297, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 0.9053, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 1.2077769041061401, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 0.7413, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.4931506849315068, |
|
"grad_norm": 0.8515187501907349, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 0.7253, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 0.6145463585853577, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.5881, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6027397260273972, |
|
"grad_norm": 0.6945990920066833, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 0.6293, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 0.6183176636695862, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 0.6014, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.7123287671232876, |
|
"grad_norm": 0.7185230255126953, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 0.5301, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.7671232876712328, |
|
"grad_norm": 0.587353527545929, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.5003, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 0.5427547097206116, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 0.551, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 0.4918208420276642, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 0.4888, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.9315068493150684, |
|
"grad_norm": 0.5143556594848633, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 0.6516, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.9863013698630136, |
|
"grad_norm": 0.5198273062705994, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.5301, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.0410958904109588, |
|
"grad_norm": 0.7991119623184204, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 0.8473, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"grad_norm": 0.44308751821517944, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.4874, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.1506849315068493, |
|
"grad_norm": 0.3959720730781555, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 0.5215, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.2054794520547945, |
|
"grad_norm": 0.4689987897872925, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 0.5615, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.2602739726027397, |
|
"grad_norm": 0.41961339116096497, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 0.4448, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.3150684931506849, |
|
"grad_norm": 0.4268343448638916, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 0.4324, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 0.38932234048843384, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 0.4158, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"eval_loss": 0.5110846757888794, |
|
"eval_runtime": 1.4625, |
|
"eval_samples_per_second": 84.104, |
|
"eval_steps_per_second": 10.94, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.4246575342465753, |
|
"grad_norm": 0.41463303565979004, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4533, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.4794520547945205, |
|
"grad_norm": 0.38293758034706116, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 0.4532, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.5342465753424657, |
|
"grad_norm": 0.3628367483615875, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 0.4094, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.589041095890411, |
|
"grad_norm": 0.42759907245635986, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 0.4622, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 0.36563095450401306, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.4902, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6986301369863015, |
|
"grad_norm": 0.4586554765701294, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 0.4133, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.7534246575342465, |
|
"grad_norm": 0.32642537355422974, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.3652, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.808219178082192, |
|
"grad_norm": 0.3642094135284424, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 0.3965, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.8630136986301369, |
|
"grad_norm": 0.36974775791168213, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.4403, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.9178082191780823, |
|
"grad_norm": 0.37709006667137146, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 0.4803, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.9726027397260273, |
|
"grad_norm": 0.4891318380832672, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 0.5114, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.0273972602739727, |
|
"grad_norm": 0.8724299073219299, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 0.7102, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.0821917808219177, |
|
"grad_norm": 0.4001636207103729, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.4218, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.136986301369863, |
|
"grad_norm": 0.3491167426109314, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 0.4221, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.191780821917808, |
|
"grad_norm": 0.37197569012641907, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 0.4686, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.2465753424657535, |
|
"grad_norm": 0.4310151934623718, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 0.3679, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.3013698630136985, |
|
"grad_norm": 0.396329790353775, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.4621, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.356164383561644, |
|
"grad_norm": 0.35895678400993347, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 0.4256, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.410958904109589, |
|
"grad_norm": 0.3557685911655426, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.4232, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 0.3738231658935547, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 0.4006, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.5205479452054793, |
|
"grad_norm": 0.36462685465812683, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 0.395, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.5753424657534247, |
|
"grad_norm": 0.3373780846595764, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 0.3644, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.6301369863013697, |
|
"grad_norm": 0.3781304955482483, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 0.4711, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.684931506849315, |
|
"grad_norm": 0.3349582254886627, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 0.4397, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"grad_norm": 0.3511578440666199, |
|
"learning_rate": 0.0, |
|
"loss": 0.415, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"eval_loss": 0.47569820284843445, |
|
"eval_runtime": 1.4632, |
|
"eval_samples_per_second": 84.064, |
|
"eval_steps_per_second": 10.935, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.059536353886208e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|