|
{ |
|
"best_metric": 0.3283706307411194, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.1498618461106168, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0029972369222123355, |
|
"grad_norm": 7.602510929107666, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 7.3791, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0029972369222123355, |
|
"eval_loss": 5.100011348724365, |
|
"eval_runtime": 0.2905, |
|
"eval_samples_per_second": 172.095, |
|
"eval_steps_per_second": 44.745, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005994473844424671, |
|
"grad_norm": 5.811465263366699, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 5.2052, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008991710766637007, |
|
"grad_norm": 6.1456217765808105, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 5.0327, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011988947688849342, |
|
"grad_norm": 5.857211112976074, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 4.9457, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014986184611061678, |
|
"grad_norm": 5.512299537658691, |
|
"learning_rate": 7.5e-05, |
|
"loss": 4.8063, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017983421533274015, |
|
"grad_norm": 5.282471179962158, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 4.7355, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02098065845548635, |
|
"grad_norm": 4.267989158630371, |
|
"learning_rate": 0.00010499999999999999, |
|
"loss": 4.4458, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.023977895377698684, |
|
"grad_norm": 3.799853801727295, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 4.2302, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02697513229991102, |
|
"grad_norm": 3.506157875061035, |
|
"learning_rate": 0.000135, |
|
"loss": 3.857, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.029972369222123357, |
|
"grad_norm": 3.8407812118530273, |
|
"learning_rate": 0.00015, |
|
"loss": 3.749, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03296960614433569, |
|
"grad_norm": 4.470568656921387, |
|
"learning_rate": 0.000165, |
|
"loss": 3.4078, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03596684306654803, |
|
"grad_norm": 4.822042465209961, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 3.0358, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.038964079988760364, |
|
"grad_norm": 5.060558795928955, |
|
"learning_rate": 0.000195, |
|
"loss": 4.5688, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0419613169109727, |
|
"grad_norm": 4.888822078704834, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 3.6099, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04495855383318503, |
|
"grad_norm": 3.4810433387756348, |
|
"learning_rate": 0.000225, |
|
"loss": 2.4291, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04795579075539737, |
|
"grad_norm": 3.577241897583008, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 2.1686, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0509530276776097, |
|
"grad_norm": 3.595766544342041, |
|
"learning_rate": 0.00025499999999999996, |
|
"loss": 1.9973, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05395026459982204, |
|
"grad_norm": 4.025913715362549, |
|
"learning_rate": 0.00027, |
|
"loss": 1.9723, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05694750152203437, |
|
"grad_norm": 2.864335536956787, |
|
"learning_rate": 0.000285, |
|
"loss": 1.9312, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05994473844424671, |
|
"grad_norm": 3.9801218509674072, |
|
"learning_rate": 0.0003, |
|
"loss": 1.8875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06294197536645904, |
|
"grad_norm": 3.2217376232147217, |
|
"learning_rate": 0.00029999841345404617, |
|
"loss": 1.5763, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06593921228867138, |
|
"grad_norm": 19.193851470947266, |
|
"learning_rate": 0.0002999936538534755, |
|
"loss": 1.8067, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06893644921088371, |
|
"grad_norm": 25.025583267211914, |
|
"learning_rate": 0.0002999857213101595, |
|
"loss": 1.4576, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07193368613309606, |
|
"grad_norm": 4.855524063110352, |
|
"learning_rate": 0.00029997461601054764, |
|
"loss": 1.0614, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0749309230553084, |
|
"grad_norm": 3.3954389095306396, |
|
"learning_rate": 0.00029996033821566326, |
|
"loss": 0.8964, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0749309230553084, |
|
"eval_loss": 1.0534682273864746, |
|
"eval_runtime": 0.2881, |
|
"eval_samples_per_second": 173.557, |
|
"eval_steps_per_second": 45.125, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07792815997752073, |
|
"grad_norm": 12.105545997619629, |
|
"learning_rate": 0.0002999428882610971, |
|
"loss": 3.0311, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08092539689973306, |
|
"grad_norm": 3.334498882293701, |
|
"learning_rate": 0.00029992226655699945, |
|
"loss": 1.2946, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0839226338219454, |
|
"grad_norm": 2.8498733043670654, |
|
"learning_rate": 0.00029989847358807104, |
|
"loss": 1.1872, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08691987074415773, |
|
"grad_norm": 2.367673873901367, |
|
"learning_rate": 0.0002998715099135508, |
|
"loss": 1.0292, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08991710766637007, |
|
"grad_norm": 8.79304313659668, |
|
"learning_rate": 0.00029984137616720325, |
|
"loss": 1.2032, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0929143445885824, |
|
"grad_norm": 8.142653465270996, |
|
"learning_rate": 0.00029980807305730374, |
|
"loss": 1.2389, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09591158151079474, |
|
"grad_norm": 4.5038628578186035, |
|
"learning_rate": 0.0002997716013666212, |
|
"loss": 1.0933, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09890881843300707, |
|
"grad_norm": 2.439879894256592, |
|
"learning_rate": 0.0002997319619524003, |
|
"loss": 0.922, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1019060553552194, |
|
"grad_norm": 2.002732992172241, |
|
"learning_rate": 0.0002996891557463412, |
|
"loss": 0.8141, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10490329227743174, |
|
"grad_norm": 1.9198921918869019, |
|
"learning_rate": 0.00029964318375457725, |
|
"loss": 0.5868, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10790052919964407, |
|
"grad_norm": 1.1390972137451172, |
|
"learning_rate": 0.00029959404705765186, |
|
"loss": 0.4158, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11089776612185641, |
|
"grad_norm": 0.9061455130577087, |
|
"learning_rate": 0.00029954174681049296, |
|
"loss": 0.3104, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11389500304406874, |
|
"grad_norm": 3.6855151653289795, |
|
"learning_rate": 0.0002994862842423856, |
|
"loss": 1.3318, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11689223996628108, |
|
"grad_norm": 2.992558717727661, |
|
"learning_rate": 0.00029942766065694333, |
|
"loss": 1.0331, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11988947688849343, |
|
"grad_norm": 2.762493133544922, |
|
"learning_rate": 0.00029936587743207736, |
|
"loss": 0.7132, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12288671381070576, |
|
"grad_norm": 2.2291383743286133, |
|
"learning_rate": 0.00029930093601996446, |
|
"loss": 0.5774, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12588395073291808, |
|
"grad_norm": 1.1489959955215454, |
|
"learning_rate": 0.0002992328379470125, |
|
"loss": 0.5567, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12888118765513043, |
|
"grad_norm": 6.168476104736328, |
|
"learning_rate": 0.00029916158481382474, |
|
"loss": 0.6131, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13187842457734275, |
|
"grad_norm": 5.408211708068848, |
|
"learning_rate": 0.0002990871782951623, |
|
"loss": 0.6638, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1348756614995551, |
|
"grad_norm": 1.5306873321533203, |
|
"learning_rate": 0.0002990096201399045, |
|
"loss": 0.5488, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13787289842176742, |
|
"grad_norm": 1.8623676300048828, |
|
"learning_rate": 0.00029892891217100817, |
|
"loss": 0.4659, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14087013534397977, |
|
"grad_norm": 1.6640721559524536, |
|
"learning_rate": 0.0002988450562854644, |
|
"loss": 0.4368, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14386737226619212, |
|
"grad_norm": 0.3559802174568176, |
|
"learning_rate": 0.0002987580544542541, |
|
"loss": 0.0507, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14686460918840444, |
|
"grad_norm": 0.4540441334247589, |
|
"learning_rate": 0.0002986679087223018, |
|
"loss": 0.087, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1498618461106168, |
|
"grad_norm": 0.8076233267784119, |
|
"learning_rate": 0.00029857462120842744, |
|
"loss": 0.1023, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1498618461106168, |
|
"eval_loss": 0.3283706307411194, |
|
"eval_runtime": 0.2882, |
|
"eval_samples_per_second": 173.489, |
|
"eval_steps_per_second": 45.107, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 668, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.296200128626688e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|