|
{ |
|
"best_metric": 1.475726842880249, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.01536216299254935, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00030724325985098704, |
|
"grad_norm": 23.011367797851562, |
|
"learning_rate": 5e-06, |
|
"loss": 6.3985, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00030724325985098704, |
|
"eval_loss": 2.409876585006714, |
|
"eval_runtime": 441.0598, |
|
"eval_samples_per_second": 12.429, |
|
"eval_steps_per_second": 6.215, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006144865197019741, |
|
"grad_norm": 32.28715515136719, |
|
"learning_rate": 1e-05, |
|
"loss": 7.734, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0009217297795529611, |
|
"grad_norm": 28.548389434814453, |
|
"learning_rate": 1.5e-05, |
|
"loss": 7.4653, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0012289730394039482, |
|
"grad_norm": 24.919015884399414, |
|
"learning_rate": 2e-05, |
|
"loss": 7.3706, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.001536216299254935, |
|
"grad_norm": 22.995384216308594, |
|
"learning_rate": 2.5e-05, |
|
"loss": 6.938, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0018434595591059221, |
|
"grad_norm": 19.593135833740234, |
|
"learning_rate": 3e-05, |
|
"loss": 7.122, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0021507028189569092, |
|
"grad_norm": 18.867374420166016, |
|
"learning_rate": 3.5e-05, |
|
"loss": 5.8915, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0024579460788078963, |
|
"grad_norm": 15.144004821777344, |
|
"learning_rate": 4e-05, |
|
"loss": 5.7937, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.002765189338658883, |
|
"grad_norm": 13.516443252563477, |
|
"learning_rate": 4.5e-05, |
|
"loss": 5.6253, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00307243259850987, |
|
"grad_norm": 14.180933952331543, |
|
"learning_rate": 5e-05, |
|
"loss": 5.3894, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003379675858360857, |
|
"grad_norm": 16.558490753173828, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 4.628, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0036869191182118443, |
|
"grad_norm": 14.397795677185059, |
|
"learning_rate": 6e-05, |
|
"loss": 5.3816, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003994162378062831, |
|
"grad_norm": 15.309121131896973, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 4.9166, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0043014056379138185, |
|
"grad_norm": 15.53266716003418, |
|
"learning_rate": 7e-05, |
|
"loss": 4.5761, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004608648897764805, |
|
"grad_norm": 13.632649421691895, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 3.6609, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004915892157615793, |
|
"grad_norm": 13.333073616027832, |
|
"learning_rate": 8e-05, |
|
"loss": 4.0839, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005223135417466779, |
|
"grad_norm": 14.622278213500977, |
|
"learning_rate": 8.5e-05, |
|
"loss": 4.4781, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.005530378677317766, |
|
"grad_norm": 12.793045997619629, |
|
"learning_rate": 9e-05, |
|
"loss": 4.9027, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0058376219371687535, |
|
"grad_norm": 11.85013484954834, |
|
"learning_rate": 9.5e-05, |
|
"loss": 3.8893, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00614486519701974, |
|
"grad_norm": 13.697391510009766, |
|
"learning_rate": 0.0001, |
|
"loss": 4.6765, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006452108456870728, |
|
"grad_norm": 10.900583267211914, |
|
"learning_rate": 9.999238475781957e-05, |
|
"loss": 3.559, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.006759351716721714, |
|
"grad_norm": 15.428689002990723, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 4.3751, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.007066594976572701, |
|
"grad_norm": 11.113511085510254, |
|
"learning_rate": 9.99314767377287e-05, |
|
"loss": 4.1964, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0073738382364236885, |
|
"grad_norm": 12.102518081665039, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 4.4795, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.007681081496274675, |
|
"grad_norm": 11.722882270812988, |
|
"learning_rate": 9.980973490458728e-05, |
|
"loss": 4.2897, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007988324756125662, |
|
"grad_norm": 11.406682968139648, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 5.0885, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00829556801597665, |
|
"grad_norm": 11.243815422058105, |
|
"learning_rate": 9.962730758206611e-05, |
|
"loss": 4.4903, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.008602811275827637, |
|
"grad_norm": 11.628358840942383, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 4.4378, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.008910054535678624, |
|
"grad_norm": 10.681234359741211, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 4.691, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00921729779552961, |
|
"grad_norm": 10.783159255981445, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 4.9312, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009524541055380597, |
|
"grad_norm": 10.892800331115723, |
|
"learning_rate": 9.908135917238321e-05, |
|
"loss": 5.2761, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.009831784315231585, |
|
"grad_norm": 11.34912109375, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 5.4007, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.010139027575082572, |
|
"grad_norm": 13.050414085388184, |
|
"learning_rate": 9.871850323926177e-05, |
|
"loss": 5.5961, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.010446270834933559, |
|
"grad_norm": 11.140335083007812, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 4.5804, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.010753514094784545, |
|
"grad_norm": 11.838504791259766, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 5.553, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.011060757354635532, |
|
"grad_norm": 12.498956680297852, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 4.8211, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01136800061448652, |
|
"grad_norm": 12.233826637268066, |
|
"learning_rate": 9.781523779815179e-05, |
|
"loss": 5.3758, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.011675243874337507, |
|
"grad_norm": 12.875245094299316, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 5.2887, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.011982487134188494, |
|
"grad_norm": 14.482268333435059, |
|
"learning_rate": 9.727592877996585e-05, |
|
"loss": 5.3555, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01228973039403948, |
|
"grad_norm": 12.469817161560059, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 5.1884, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012596973653890467, |
|
"grad_norm": 14.905447959899902, |
|
"learning_rate": 9.667902132486009e-05, |
|
"loss": 5.7557, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.012904216913741455, |
|
"grad_norm": 12.736101150512695, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 4.3636, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.013211460173592442, |
|
"grad_norm": 13.541585922241211, |
|
"learning_rate": 9.602524267262203e-05, |
|
"loss": 5.2159, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.013518703433443429, |
|
"grad_norm": 14.56790542602539, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 6.1784, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.013825946693294415, |
|
"grad_norm": 14.048850059509277, |
|
"learning_rate": 9.53153893518325e-05, |
|
"loss": 5.7569, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.014133189953145402, |
|
"grad_norm": 14.115535736083984, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 5.7633, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01444043321299639, |
|
"grad_norm": 13.446468353271484, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 5.7742, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.014747676472847377, |
|
"grad_norm": 15.913680076599121, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 5.6512, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.015054919732698364, |
|
"grad_norm": 13.415315628051758, |
|
"learning_rate": 9.373098535696979e-05, |
|
"loss": 4.9549, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01536216299254935, |
|
"grad_norm": 22.350265502929688, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 6.6309, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01536216299254935, |
|
"eval_loss": 1.475726842880249, |
|
"eval_runtime": 443.646, |
|
"eval_samples_per_second": 12.357, |
|
"eval_steps_per_second": 6.178, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.19077308849193e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|