|
{ |
|
"best_metric": 0.06158732250332832, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 1.4328358208955223, |
|
"eval_steps": 25, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00955223880597015, |
|
"grad_norm": 29.604724884033203, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 11.1334, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00955223880597015, |
|
"eval_loss": 10.308030128479004, |
|
"eval_runtime": 7.207, |
|
"eval_samples_per_second": 6.938, |
|
"eval_steps_per_second": 6.938, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0191044776119403, |
|
"grad_norm": 37.691951751708984, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 10.295, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.028656716417910448, |
|
"grad_norm": 40.03203582763672, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 9.8773, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0382089552238806, |
|
"grad_norm": 37.06792068481445, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 7.4423, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04776119402985075, |
|
"grad_norm": 25.62007713317871, |
|
"learning_rate": 0.00015, |
|
"loss": 4.1197, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.057313432835820896, |
|
"grad_norm": 20.416719436645508, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 2.4344, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06686567164179104, |
|
"grad_norm": 13.990738868713379, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.6984, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0764179104477612, |
|
"grad_norm": 26.440662384033203, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.6423, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08597014925373134, |
|
"grad_norm": 1.5983080863952637, |
|
"learning_rate": 0.00027, |
|
"loss": 0.0198, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0955223880597015, |
|
"grad_norm": 0.14272946119308472, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0022, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10507462686567164, |
|
"grad_norm": 3.5088140964508057, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 0.0129, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.11462686567164179, |
|
"grad_norm": 0.5191304087638855, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 0.0023, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12417910447761193, |
|
"grad_norm": 0.00615344662219286, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 0.0002, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.13373134328358208, |
|
"grad_norm": 0.007408325094729662, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 0.0002, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.14328358208955225, |
|
"grad_norm": 0.013087390922009945, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 0.0004, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1528358208955224, |
|
"grad_norm": 0.020046714693307877, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 0.0005, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.16238805970149253, |
|
"grad_norm": 4.9524407386779785, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 0.0049, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.17194029850746267, |
|
"grad_norm": 0.02008659392595291, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 0.0005, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.18149253731343284, |
|
"grad_norm": 0.022405585274100304, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 0.0005, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.191044776119403, |
|
"grad_norm": 0.015606110915541649, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 0.0004, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20059701492537313, |
|
"grad_norm": 0.014573603868484497, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 0.0004, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.21014925373134327, |
|
"grad_norm": 0.05520666390657425, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 0.0005, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.21970149253731344, |
|
"grad_norm": 0.015050092712044716, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 0.0003, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.22925373134328358, |
|
"grad_norm": 0.017276253551244736, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 0.0003, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 0.01385075319558382, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 0.0002, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"eval_loss": 0.5136697292327881, |
|
"eval_runtime": 7.3698, |
|
"eval_samples_per_second": 6.784, |
|
"eval_steps_per_second": 6.784, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.24835820895522387, |
|
"grad_norm": 0.003815986216068268, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 0.0001, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.25791044776119404, |
|
"grad_norm": 132.7102813720703, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 10.3943, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.26746268656716415, |
|
"grad_norm": 1.9110982418060303, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 0.2244, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2770149253731343, |
|
"grad_norm": 2.235290288925171, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.0308, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2865671641791045, |
|
"grad_norm": 0.061887867748737335, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 0.0014, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2961194029850746, |
|
"grad_norm": 0.09402349591255188, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 0.0016, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.3056716417910448, |
|
"grad_norm": 0.2571331858634949, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 0.0029, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.31522388059701495, |
|
"grad_norm": 0.0307838823646307, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 0.0005, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.32477611940298506, |
|
"grad_norm": 0.016969487071037292, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 0.0003, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.33432835820895523, |
|
"grad_norm": 0.015743907541036606, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 0.0002, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.34388059701492535, |
|
"grad_norm": 0.010241161100566387, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 0.0002, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3534328358208955, |
|
"grad_norm": 0.007466515991836786, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 0.0002, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3629850746268657, |
|
"grad_norm": 0.005754731595516205, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 0.0002, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3725373134328358, |
|
"grad_norm": 0.004388387314975262, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 0.0001, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.382089552238806, |
|
"grad_norm": 0.0040399739518761635, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 0.0001, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.39164179104477614, |
|
"grad_norm": 0.003504898166283965, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 0.0001, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.40119402985074626, |
|
"grad_norm": 0.00304769491776824, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 0.0001, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.41074626865671643, |
|
"grad_norm": 0.0027260910719633102, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 0.0001, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.42029850746268654, |
|
"grad_norm": 0.0024885141756385565, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 0.0001, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4298507462686567, |
|
"grad_norm": 0.002281956607475877, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 0.0001, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4394029850746269, |
|
"grad_norm": 0.0018665606621652842, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 0.0001, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.448955223880597, |
|
"grad_norm": 0.001674832310527563, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 0.0001, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.45850746268656717, |
|
"grad_norm": 0.0015318113146349788, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.0001, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.46805970149253734, |
|
"grad_norm": 0.001571908826008439, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 0.0001, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 0.0012952347751706839, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 0.0001, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"eval_loss": 0.23288998007774353, |
|
"eval_runtime": 7.3594, |
|
"eval_samples_per_second": 6.794, |
|
"eval_steps_per_second": 6.794, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4871641791044776, |
|
"grad_norm": 0.0012007909826934338, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 0.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.49671641791044774, |
|
"grad_norm": 0.0011592863593250513, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 0.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.506268656716418, |
|
"grad_norm": 46.191253662109375, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 3.1583, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5158208955223881, |
|
"grad_norm": 74.40240478515625, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 0.9258, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5253731343283582, |
|
"grad_norm": 0.0414118617773056, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 0.0002, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5349253731343283, |
|
"grad_norm": 0.8846156001091003, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 0.0021, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5444776119402985, |
|
"grad_norm": 0.007200995460152626, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 0.0002, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5540298507462686, |
|
"grad_norm": 22.75172233581543, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 0.0938, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5635820895522388, |
|
"grad_norm": 0.020017998293042183, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 0.0004, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.573134328358209, |
|
"grad_norm": 0.039760664105415344, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 0.0006, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5826865671641791, |
|
"grad_norm": 0.019533224403858185, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 0.0004, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5922388059701492, |
|
"grad_norm": 0.03347967565059662, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 0.0005, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.6017910447761194, |
|
"grad_norm": 0.02993916906416416, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 0.0005, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.6113432835820896, |
|
"grad_norm": 0.013049600645899773, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 0.0003, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6208955223880597, |
|
"grad_norm": 0.008280406706035137, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 0.0002, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6304477611940299, |
|
"grad_norm": 0.006376465316861868, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 0.0002, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.0055150194093585014, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.0002, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6495522388059701, |
|
"grad_norm": 0.004492188338190317, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 0.0001, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6591044776119404, |
|
"grad_norm": 0.0042125750333070755, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 0.0001, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6686567164179105, |
|
"grad_norm": 0.003604372963309288, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 0.0001, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6782089552238806, |
|
"grad_norm": 0.0029829898849129677, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 0.0001, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6877611940298507, |
|
"grad_norm": 0.0027445245068520308, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 0.0001, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6973134328358209, |
|
"grad_norm": 0.0025820331647992134, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 0.0001, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.706865671641791, |
|
"grad_norm": 0.0014186253538355231, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 0.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 0.0013180454261600971, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"eval_loss": 0.0724787563085556, |
|
"eval_runtime": 7.3644, |
|
"eval_samples_per_second": 6.789, |
|
"eval_steps_per_second": 6.789, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7259701492537314, |
|
"grad_norm": 0.0012341466499492526, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 0.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.7355223880597015, |
|
"grad_norm": 0.0008805791730992496, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 0.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.7450746268656716, |
|
"grad_norm": 0.0008222783799283206, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7546268656716418, |
|
"grad_norm": 31.767019271850586, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 2.0511, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.764179104477612, |
|
"grad_norm": 3.381103754043579, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 0.1576, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7737313432835821, |
|
"grad_norm": 0.0009540443425066769, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 0.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7832835820895523, |
|
"grad_norm": 0.001079144305549562, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 0.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7928358208955224, |
|
"grad_norm": 0.0013730615610256791, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 0.0001, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.8023880597014925, |
|
"grad_norm": 0.001953846774995327, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 0.0001, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.8119402985074626, |
|
"grad_norm": 0.012235262431204319, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 0.0001, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8214925373134329, |
|
"grad_norm": 0.017554378136992455, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.0002, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.831044776119403, |
|
"grad_norm": 0.025415591895580292, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 0.0002, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.8405970149253731, |
|
"grad_norm": 0.010554085485637188, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 0.0001, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.8501492537313433, |
|
"grad_norm": 0.012406528927385807, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 0.0001, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.8597014925373134, |
|
"grad_norm": 0.008424860425293446, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 0.0001, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8692537313432835, |
|
"grad_norm": 0.006642926950007677, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 0.0001, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.8788059701492538, |
|
"grad_norm": 0.003924868535250425, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 0.0001, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8883582089552239, |
|
"grad_norm": 0.0025878618471324444, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 0.0001, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.897910447761194, |
|
"grad_norm": 0.002073967596516013, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 0.0001, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.9074626865671642, |
|
"grad_norm": 0.0016776231350377202, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 0.0001, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9170149253731343, |
|
"grad_norm": 0.0012397398240864277, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 0.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.9265671641791045, |
|
"grad_norm": 0.0012315617641434073, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 0.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.9361194029850747, |
|
"grad_norm": 0.0010754205286502838, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 0.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.9456716417910448, |
|
"grad_norm": 0.001088542165234685, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 0.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 0.000979114673100412, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"eval_loss": 0.09308185428380966, |
|
"eval_runtime": 7.3775, |
|
"eval_samples_per_second": 6.777, |
|
"eval_steps_per_second": 6.777, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.964776119402985, |
|
"grad_norm": 0.0009638951742090285, |
|
"learning_rate": 0.0001599135876488549, |
|
"loss": 0.0, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.9743283582089552, |
|
"grad_norm": 0.0008584060706198215, |
|
"learning_rate": 0.00015743756320098332, |
|
"loss": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9838805970149254, |
|
"grad_norm": 0.0009191096178255975, |
|
"learning_rate": 0.0001549595053975962, |
|
"loss": 0.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.9934328358208955, |
|
"grad_norm": 0.0008119167177937925, |
|
"learning_rate": 0.00015248009171495378, |
|
"loss": 0.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.0029850746268656, |
|
"grad_norm": 7.888625144958496, |
|
"learning_rate": 0.00015, |
|
"loss": 0.6371, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.012537313432836, |
|
"grad_norm": 11.907392501831055, |
|
"learning_rate": 0.00014751990828504622, |
|
"loss": 1.7248, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.022089552238806, |
|
"grad_norm": 0.002705940045416355, |
|
"learning_rate": 0.00014504049460240375, |
|
"loss": 0.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.0316417910447762, |
|
"grad_norm": 0.0012168614193797112, |
|
"learning_rate": 0.00014256243679901663, |
|
"loss": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.0411940298507463, |
|
"grad_norm": 0.0009762799018062651, |
|
"learning_rate": 0.00014008641235114508, |
|
"loss": 0.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0507462686567164, |
|
"grad_norm": 0.0009869365021586418, |
|
"learning_rate": 0.00013761309817915014, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0602985074626865, |
|
"grad_norm": 0.0013177823275327682, |
|
"learning_rate": 0.00013514317046243058, |
|
"loss": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.0698507462686566, |
|
"grad_norm": 0.0011474322527647018, |
|
"learning_rate": 0.00013267730445456208, |
|
"loss": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.079402985074627, |
|
"grad_norm": 0.0015544156776741147, |
|
"learning_rate": 0.00013021617429868963, |
|
"loss": 0.0001, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.088955223880597, |
|
"grad_norm": 0.002088154200464487, |
|
"learning_rate": 0.00012776045284322368, |
|
"loss": 0.0001, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.0985074626865672, |
|
"grad_norm": 0.002132783178240061, |
|
"learning_rate": 0.00012531081145788987, |
|
"loss": 0.0001, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1080597014925373, |
|
"grad_norm": 0.010075758211314678, |
|
"learning_rate": 0.00012286791985018355, |
|
"loss": 0.0001, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.1176119402985074, |
|
"grad_norm": 0.004856719635426998, |
|
"learning_rate": 0.00012043244588227796, |
|
"loss": 0.0001, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.1271641791044775, |
|
"grad_norm": 0.004902370739728212, |
|
"learning_rate": 0.00011800505538843798, |
|
"loss": 0.0001, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.1367164179104479, |
|
"grad_norm": 0.0026889245491474867, |
|
"learning_rate": 0.00011558641199298727, |
|
"loss": 0.0001, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.146268656716418, |
|
"grad_norm": 0.002710577566176653, |
|
"learning_rate": 0.00011317717692888012, |
|
"loss": 0.0001, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.155820895522388, |
|
"grad_norm": 0.002412110799923539, |
|
"learning_rate": 0.00011077800885692702, |
|
"loss": 0.0001, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.1653731343283582, |
|
"grad_norm": 0.0020358674228191376, |
|
"learning_rate": 0.00010838956368572334, |
|
"loss": 0.0001, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.1749253731343283, |
|
"grad_norm": 0.0016259319381788373, |
|
"learning_rate": 0.0001060124943923303, |
|
"loss": 0.0001, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.1844776119402984, |
|
"grad_norm": 0.0015004443703219295, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 0.0001, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.1940298507462686, |
|
"grad_norm": 0.0015228495467454195, |
|
"learning_rate": 0.00010129507961929748, |
|
"loss": 0.0001, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.1940298507462686, |
|
"eval_loss": 0.08212555944919586, |
|
"eval_runtime": 7.3806, |
|
"eval_samples_per_second": 6.775, |
|
"eval_steps_per_second": 6.775, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2035820895522389, |
|
"grad_norm": 0.0013782400637865067, |
|
"learning_rate": 9.895602383375353e-05, |
|
"loss": 0.0001, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.213134328358209, |
|
"grad_norm": 0.0017314350698143244, |
|
"learning_rate": 9.663092296162251e-05, |
|
"loss": 0.0001, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.2226865671641791, |
|
"grad_norm": 0.0015217890031635761, |
|
"learning_rate": 9.432041266226686e-05, |
|
"loss": 0.0001, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.2322388059701492, |
|
"grad_norm": 0.0013981597730889916, |
|
"learning_rate": 9.202512460613219e-05, |
|
"loss": 0.0001, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.2417910447761193, |
|
"grad_norm": 0.0011965703452005982, |
|
"learning_rate": 8.97456863020546e-05, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.2513432835820897, |
|
"grad_norm": 6.166757583618164, |
|
"learning_rate": 8.748272092570646e-05, |
|
"loss": 0.5203, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.2608955223880596, |
|
"grad_norm": 7.15877103805542, |
|
"learning_rate": 8.523684714922608e-05, |
|
"loss": 0.7429, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.27044776119403, |
|
"grad_norm": 0.0012916004052385688, |
|
"learning_rate": 8.300867897207903e-05, |
|
"loss": 0.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.0012297456851229072, |
|
"learning_rate": 8.079882555319684e-05, |
|
"loss": 0.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.2895522388059701, |
|
"grad_norm": 0.002031672513112426, |
|
"learning_rate": 7.860789104443896e-05, |
|
"loss": 0.0001, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.2991044776119403, |
|
"grad_norm": 0.01505509577691555, |
|
"learning_rate": 7.643647442542382e-05, |
|
"loss": 0.0001, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.3086567164179104, |
|
"grad_norm": 0.005438692402094603, |
|
"learning_rate": 7.428516933977347e-05, |
|
"loss": 0.0001, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.3182089552238807, |
|
"grad_norm": 0.008575469255447388, |
|
"learning_rate": 7.215456393281776e-05, |
|
"loss": 0.0001, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.3277611940298508, |
|
"grad_norm": 0.025798721238970757, |
|
"learning_rate": 7.004524069080096e-05, |
|
"loss": 0.0002, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.337313432835821, |
|
"grad_norm": 0.014630038291215897, |
|
"learning_rate": 6.795777628163599e-05, |
|
"loss": 0.0001, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.346865671641791, |
|
"grad_norm": 0.01390786375850439, |
|
"learning_rate": 6.58927413972491e-05, |
|
"loss": 0.0001, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.3564179104477612, |
|
"grad_norm": 0.003279446391388774, |
|
"learning_rate": 6.385070059755846e-05, |
|
"loss": 0.0001, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.3659701492537313, |
|
"grad_norm": 0.0030151065438985825, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 0.0001, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.3755223880597014, |
|
"grad_norm": 0.007617350667715073, |
|
"learning_rate": 5.983782790754623e-05, |
|
"loss": 0.0001, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.3850746268656717, |
|
"grad_norm": 0.0024264988023787737, |
|
"learning_rate": 5.786809309654982e-05, |
|
"loss": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.3946268656716418, |
|
"grad_norm": 0.004660547710955143, |
|
"learning_rate": 5.592354622896944e-05, |
|
"loss": 0.0001, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.404179104477612, |
|
"grad_norm": 0.0025053154677152634, |
|
"learning_rate": 5.40047189245025e-05, |
|
"loss": 0.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.413731343283582, |
|
"grad_norm": 0.0053937798365950584, |
|
"learning_rate": 5.211213577137469e-05, |
|
"loss": 0.0001, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.4232835820895522, |
|
"grad_norm": 0.001341644674539566, |
|
"learning_rate": 5.024631418292274e-05, |
|
"loss": 0.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.4328358208955223, |
|
"grad_norm": 0.0014711751136928797, |
|
"learning_rate": 4.840776425613886e-05, |
|
"loss": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4328358208955223, |
|
"eval_loss": 0.06158732250332832, |
|
"eval_runtime": 7.3739, |
|
"eval_samples_per_second": 6.781, |
|
"eval_steps_per_second": 6.781, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.364806786973696e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|