|
{ |
|
"best_metric": 0.7496414184570312, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.6065200909780136, |
|
"eval_steps": 25, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003032600454890068, |
|
"grad_norm": 0.5732214450836182, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.2366, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003032600454890068, |
|
"eval_loss": 2.233814001083374, |
|
"eval_runtime": 2.1263, |
|
"eval_samples_per_second": 23.515, |
|
"eval_steps_per_second": 3.292, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006065200909780136, |
|
"grad_norm": 0.5826615691184998, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.2069, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009097801364670205, |
|
"grad_norm": 0.8060511946678162, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 1.4296, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.012130401819560273, |
|
"grad_norm": 0.7838976979255676, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.8031, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015163002274450341, |
|
"grad_norm": 0.9152936935424805, |
|
"learning_rate": 0.00015, |
|
"loss": 2.8825, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01819560272934041, |
|
"grad_norm": 0.9083536863327026, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 2.8714, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02122820318423048, |
|
"grad_norm": 0.7592295408248901, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 2.6227, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.024260803639120546, |
|
"grad_norm": 0.8848680257797241, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 2.3879, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.027293404094010616, |
|
"grad_norm": 1.1162092685699463, |
|
"learning_rate": 0.00027, |
|
"loss": 1.9078, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.030326004548900682, |
|
"grad_norm": 1.1469495296478271, |
|
"learning_rate": 0.0003, |
|
"loss": 2.3304, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03335860500379075, |
|
"grad_norm": 1.7421014308929443, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 2.6528, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03639120545868082, |
|
"grad_norm": 2.160493850708008, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 1.7365, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.039423805913570885, |
|
"grad_norm": 1.6525168418884277, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 1.3977, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04245640636846096, |
|
"grad_norm": 1.732264757156372, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 0.9095, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.045489006823351025, |
|
"grad_norm": 1.9203606843948364, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 0.8081, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04852160727824109, |
|
"grad_norm": 1.9768295288085938, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 0.5633, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05155420773313116, |
|
"grad_norm": 1.7694615125656128, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 0.8582, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05458680818802123, |
|
"grad_norm": 5.227871417999268, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 2.0478, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0576194086429113, |
|
"grad_norm": 5.613846302032471, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 2.628, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.060652009097801364, |
|
"grad_norm": 3.1168196201324463, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 2.2502, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06368460955269144, |
|
"grad_norm": 2.028256893157959, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 1.9875, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0667172100075815, |
|
"grad_norm": 2.2501745223999023, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 1.3561, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06974981046247157, |
|
"grad_norm": 2.091094970703125, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 1.4375, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07278241091736164, |
|
"grad_norm": 1.8560627698898315, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 0.8881, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0758150113722517, |
|
"grad_norm": 1.5652915239334106, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 1.1072, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0758150113722517, |
|
"eval_loss": 1.6969295740127563, |
|
"eval_runtime": 1.6362, |
|
"eval_samples_per_second": 30.559, |
|
"eval_steps_per_second": 4.278, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07884761182714177, |
|
"grad_norm": 2.593773126602173, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 2.3669, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08188021228203184, |
|
"grad_norm": 3.031888961791992, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 2.1705, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08491281273692192, |
|
"grad_norm": 5.483036994934082, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 2.0639, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08794541319181198, |
|
"grad_norm": 2.578169584274292, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 2.2763, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09097801364670205, |
|
"grad_norm": 2.708948850631714, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 2.6701, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09401061410159212, |
|
"grad_norm": 2.3376152515411377, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 2.552, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09704321455648218, |
|
"grad_norm": 4.5717453956604, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 2.5146, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.10007581501137225, |
|
"grad_norm": 3.326918601989746, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 2.7361, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10310841546626232, |
|
"grad_norm": 3.0180697441101074, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 2.1276, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10614101592115238, |
|
"grad_norm": 3.3345470428466797, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 3.5262, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10917361637604246, |
|
"grad_norm": 4.028136730194092, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 1.3238, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11220621683093253, |
|
"grad_norm": 2.7964673042297363, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 1.9645, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1152388172858226, |
|
"grad_norm": 2.7283153533935547, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 1.6322, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11827141774071266, |
|
"grad_norm": 3.139758586883545, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 1.3747, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12130401819560273, |
|
"grad_norm": 3.401658535003662, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 1.0433, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1243366186504928, |
|
"grad_norm": 3.2721481323242188, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 0.9206, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12736921910538287, |
|
"grad_norm": 2.2043514251708984, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 0.6399, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.13040181956027294, |
|
"grad_norm": 2.3156049251556396, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 0.8682, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.133434420015163, |
|
"grad_norm": 4.419061660766602, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 0.5265, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13646702047005307, |
|
"grad_norm": 2.9401931762695312, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 0.4974, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13949962092494314, |
|
"grad_norm": 2.4342000484466553, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 0.4075, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1425322213798332, |
|
"grad_norm": 1.5229568481445312, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 0.2443, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14556482183472327, |
|
"grad_norm": 0.7514405846595764, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.032, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14859742228961334, |
|
"grad_norm": 3.046010732650757, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 0.6208, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1516300227445034, |
|
"grad_norm": 3.323383331298828, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 0.4257, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1516300227445034, |
|
"eval_loss": 2.1368014812469482, |
|
"eval_runtime": 1.6375, |
|
"eval_samples_per_second": 30.534, |
|
"eval_steps_per_second": 4.275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15466262319939347, |
|
"grad_norm": 4.283215522766113, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 1.3323, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15769522365428354, |
|
"grad_norm": 3.335487127304077, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 1.1937, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1607278241091736, |
|
"grad_norm": 2.471447706222534, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 0.9932, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16376042456406367, |
|
"grad_norm": 1.7774912118911743, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 0.7889, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16679302501895377, |
|
"grad_norm": 3.3943495750427246, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 2.2703, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16982562547384383, |
|
"grad_norm": 2.009979724884033, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 2.3109, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1728582259287339, |
|
"grad_norm": 2.5208301544189453, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 2.1646, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17589082638362397, |
|
"grad_norm": 1.7100300788879395, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 1.8255, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17892342683851403, |
|
"grad_norm": 1.4710556268692017, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 1.0757, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1819560272934041, |
|
"grad_norm": 1.435238003730774, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 1.3513, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18498862774829417, |
|
"grad_norm": 1.8190333843231201, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 1.6192, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.18802122820318423, |
|
"grad_norm": 1.4935904741287231, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 1.3938, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1910538286580743, |
|
"grad_norm": 1.5081324577331543, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 1.0642, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.19408642911296436, |
|
"grad_norm": 1.1953938007354736, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 0.5319, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.19711902956785443, |
|
"grad_norm": 1.042183756828308, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 0.2488, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2001516300227445, |
|
"grad_norm": 0.7810710072517395, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 0.2012, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.20318423047763456, |
|
"grad_norm": 0.7428088188171387, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.2595, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.20621683093252463, |
|
"grad_norm": 1.495041012763977, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 1.0473, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2092494313874147, |
|
"grad_norm": 3.027667284011841, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 1.2385, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21228203184230476, |
|
"grad_norm": 2.402413845062256, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 1.1585, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21531463229719486, |
|
"grad_norm": 2.1353776454925537, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 0.8443, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21834723275208492, |
|
"grad_norm": 1.6501647233963013, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 1.0337, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.221379833206975, |
|
"grad_norm": 4.340311527252197, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 1.1674, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.22441243366186506, |
|
"grad_norm": 2.6187245845794678, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 0.8261, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22744503411675512, |
|
"grad_norm": 1.8873292207717896, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 0.6276, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22744503411675512, |
|
"eval_loss": 1.2340877056121826, |
|
"eval_runtime": 1.6504, |
|
"eval_samples_per_second": 30.296, |
|
"eval_steps_per_second": 4.242, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2304776345716452, |
|
"grad_norm": 1.3795561790466309, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 1.1671, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.23351023502653526, |
|
"grad_norm": 2.4479212760925293, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 1.5214, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23654283548142532, |
|
"grad_norm": 2.233268976211548, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 1.3013, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2395754359363154, |
|
"grad_norm": 1.663133144378662, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 1.0497, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.24260803639120546, |
|
"grad_norm": 1.6820964813232422, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 1.8455, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24564063684609552, |
|
"grad_norm": 1.9858072996139526, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 1.2171, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2486732373009856, |
|
"grad_norm": 1.5699634552001953, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 1.2286, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2517058377558757, |
|
"grad_norm": 1.4695563316345215, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 1.815, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.25473843821076575, |
|
"grad_norm": 1.7311599254608154, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 2.3227, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2577710386656558, |
|
"grad_norm": 1.2741672992706299, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 1.8355, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2608036391205459, |
|
"grad_norm": 1.5505032539367676, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 1.4854, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.26383623957543595, |
|
"grad_norm": 1.86395263671875, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 1.8457, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.266868840030326, |
|
"grad_norm": 1.2954498529434204, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 2.1125, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2699014404852161, |
|
"grad_norm": 1.0831388235092163, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 1.1493, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.27293404094010615, |
|
"grad_norm": 1.4645804166793823, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 1.0175, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2759666413949962, |
|
"grad_norm": 1.5428311824798584, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 0.9792, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2789992418498863, |
|
"grad_norm": 1.3811968564987183, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 0.4531, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.28203184230477635, |
|
"grad_norm": 1.3926177024841309, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 0.4052, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2850644427596664, |
|
"grad_norm": 1.544171929359436, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 0.5097, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2880970432145565, |
|
"grad_norm": 1.181302547454834, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 0.327, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.29112964366944655, |
|
"grad_norm": 1.7300562858581543, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 0.3545, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2941622441243366, |
|
"grad_norm": 1.378700852394104, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 0.2552, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2971948445792267, |
|
"grad_norm": 1.5631200075149536, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 0.2897, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.30022744503411675, |
|
"grad_norm": 0.8782522678375244, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 0.1431, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3032600454890068, |
|
"grad_norm": 2.4966511726379395, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 0.5109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3032600454890068, |
|
"eval_loss": 1.3453013896942139, |
|
"eval_runtime": 1.6367, |
|
"eval_samples_per_second": 30.55, |
|
"eval_steps_per_second": 4.277, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3062926459438969, |
|
"grad_norm": 2.116553544998169, |
|
"learning_rate": 0.0001599135876488549, |
|
"loss": 0.7869, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.30932524639878695, |
|
"grad_norm": 1.7891075611114502, |
|
"learning_rate": 0.00015743756320098332, |
|
"loss": 0.6726, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.312357846853677, |
|
"grad_norm": 1.2334423065185547, |
|
"learning_rate": 0.0001549595053975962, |
|
"loss": 0.5829, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3153904473085671, |
|
"grad_norm": 1.21792733669281, |
|
"learning_rate": 0.00015248009171495378, |
|
"loss": 0.4098, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.31842304776345715, |
|
"grad_norm": 4.549845218658447, |
|
"learning_rate": 0.00015, |
|
"loss": 1.1113, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3214556482183472, |
|
"grad_norm": 2.6019155979156494, |
|
"learning_rate": 0.00014751990828504622, |
|
"loss": 2.0139, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3244882486732373, |
|
"grad_norm": 2.857210159301758, |
|
"learning_rate": 0.00014504049460240375, |
|
"loss": 1.7404, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.32752084912812734, |
|
"grad_norm": 2.389359474182129, |
|
"learning_rate": 0.00014256243679901663, |
|
"loss": 1.1878, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3305534495830174, |
|
"grad_norm": 2.0703272819519043, |
|
"learning_rate": 0.00014008641235114508, |
|
"loss": 0.7422, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.33358605003790753, |
|
"grad_norm": 2.1270902156829834, |
|
"learning_rate": 0.00013761309817915014, |
|
"loss": 0.8381, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3366186504927976, |
|
"grad_norm": 2.573129415512085, |
|
"learning_rate": 0.00013514317046243058, |
|
"loss": 1.0021, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.33965125094768767, |
|
"grad_norm": 2.3638718128204346, |
|
"learning_rate": 0.00013267730445456208, |
|
"loss": 1.0738, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.34268385140257773, |
|
"grad_norm": 1.510916829109192, |
|
"learning_rate": 0.00013021617429868963, |
|
"loss": 0.7256, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3457164518574678, |
|
"grad_norm": 1.4690295457839966, |
|
"learning_rate": 0.00012776045284322368, |
|
"loss": 0.3295, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.34874905231235787, |
|
"grad_norm": 0.834520161151886, |
|
"learning_rate": 0.00012531081145788987, |
|
"loss": 0.2155, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.35178165276724793, |
|
"grad_norm": 0.8141886591911316, |
|
"learning_rate": 0.00012286791985018355, |
|
"loss": 0.1817, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.354814253222138, |
|
"grad_norm": 1.3728559017181396, |
|
"learning_rate": 0.00012043244588227796, |
|
"loss": 0.8601, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.35784685367702807, |
|
"grad_norm": 2.7137482166290283, |
|
"learning_rate": 0.00011800505538843798, |
|
"loss": 0.7255, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.36087945413191813, |
|
"grad_norm": 2.6060287952423096, |
|
"learning_rate": 0.00011558641199298727, |
|
"loss": 0.4437, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3639120545868082, |
|
"grad_norm": 1.514891505241394, |
|
"learning_rate": 0.00011317717692888012, |
|
"loss": 0.3726, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.36694465504169826, |
|
"grad_norm": 1.1686081886291504, |
|
"learning_rate": 0.00011077800885692702, |
|
"loss": 0.6103, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.36997725549658833, |
|
"grad_norm": 1.221090316772461, |
|
"learning_rate": 0.00010838956368572334, |
|
"loss": 0.8926, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3730098559514784, |
|
"grad_norm": 1.323407530784607, |
|
"learning_rate": 0.0001060124943923303, |
|
"loss": 0.5818, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.37604245640636846, |
|
"grad_norm": 0.9954697489738464, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 0.5115, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.37907505686125853, |
|
"grad_norm": 0.8847994804382324, |
|
"learning_rate": 0.00010129507961929748, |
|
"loss": 0.5309, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.37907505686125853, |
|
"eval_loss": 0.8118882775306702, |
|
"eval_runtime": 1.6485, |
|
"eval_samples_per_second": 30.332, |
|
"eval_steps_per_second": 4.246, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3821076573161486, |
|
"grad_norm": 1.0781829357147217, |
|
"learning_rate": 9.895602383375353e-05, |
|
"loss": 1.2797, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.38514025777103866, |
|
"grad_norm": 1.266541838645935, |
|
"learning_rate": 9.663092296162251e-05, |
|
"loss": 1.5195, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.38817285822592873, |
|
"grad_norm": 1.4716166257858276, |
|
"learning_rate": 9.432041266226686e-05, |
|
"loss": 1.1823, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3912054586808188, |
|
"grad_norm": 1.729562520980835, |
|
"learning_rate": 9.202512460613219e-05, |
|
"loss": 1.2575, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.39423805913570886, |
|
"grad_norm": 1.367742896080017, |
|
"learning_rate": 8.97456863020546e-05, |
|
"loss": 1.5417, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.39727065959059893, |
|
"grad_norm": 1.3168230056762695, |
|
"learning_rate": 8.748272092570646e-05, |
|
"loss": 1.6111, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.400303260045489, |
|
"grad_norm": 1.4090535640716553, |
|
"learning_rate": 8.523684714922608e-05, |
|
"loss": 1.9075, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.40333586050037906, |
|
"grad_norm": 1.2242377996444702, |
|
"learning_rate": 8.300867897207903e-05, |
|
"loss": 1.6223, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.40636846095526913, |
|
"grad_norm": 1.632868766784668, |
|
"learning_rate": 8.079882555319684e-05, |
|
"loss": 2.0089, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.4094010614101592, |
|
"grad_norm": 1.2393670082092285, |
|
"learning_rate": 7.860789104443896e-05, |
|
"loss": 1.9279, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.41243366186504926, |
|
"grad_norm": 1.6518961191177368, |
|
"learning_rate": 7.643647442542382e-05, |
|
"loss": 1.5933, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.41546626231993933, |
|
"grad_norm": 1.4009312391281128, |
|
"learning_rate": 7.428516933977347e-05, |
|
"loss": 2.0802, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.4184988627748294, |
|
"grad_norm": 1.5112993717193604, |
|
"learning_rate": 7.215456393281776e-05, |
|
"loss": 0.7962, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.42153146322971946, |
|
"grad_norm": 1.6284480094909668, |
|
"learning_rate": 7.004524069080096e-05, |
|
"loss": 1.1892, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.4245640636846095, |
|
"grad_norm": 1.3762167692184448, |
|
"learning_rate": 6.795777628163599e-05, |
|
"loss": 1.1679, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4275966641394996, |
|
"grad_norm": 1.8651918172836304, |
|
"learning_rate": 6.58927413972491e-05, |
|
"loss": 0.6623, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4306292645943897, |
|
"grad_norm": 2.2076892852783203, |
|
"learning_rate": 6.385070059755846e-05, |
|
"loss": 0.5948, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4336618650492798, |
|
"grad_norm": 1.904805064201355, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 0.3524, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.43669446550416985, |
|
"grad_norm": 2.0237276554107666, |
|
"learning_rate": 5.983782790754623e-05, |
|
"loss": 0.3425, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4397270659590599, |
|
"grad_norm": 1.636704444885254, |
|
"learning_rate": 5.786809309654982e-05, |
|
"loss": 0.4734, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.44275966641395, |
|
"grad_norm": 1.4179580211639404, |
|
"learning_rate": 5.592354622896944e-05, |
|
"loss": 0.2568, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.44579226686884005, |
|
"grad_norm": 1.702435851097107, |
|
"learning_rate": 5.40047189245025e-05, |
|
"loss": 0.2547, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4488248673237301, |
|
"grad_norm": 1.4788601398468018, |
|
"learning_rate": 5.211213577137469e-05, |
|
"loss": 0.2914, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4518574677786202, |
|
"grad_norm": 1.5061534643173218, |
|
"learning_rate": 5.024631418292274e-05, |
|
"loss": 0.1883, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.45489006823351025, |
|
"grad_norm": 1.25308358669281, |
|
"learning_rate": 4.840776425613886e-05, |
|
"loss": 0.1811, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45489006823351025, |
|
"eval_loss": 0.7496414184570312, |
|
"eval_runtime": 1.6384, |
|
"eval_samples_per_second": 30.517, |
|
"eval_steps_per_second": 4.272, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4579226686884003, |
|
"grad_norm": 1.176184058189392, |
|
"learning_rate": 4.659698863221513e-05, |
|
"loss": 0.3398, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4609552691432904, |
|
"grad_norm": 1.151708960533142, |
|
"learning_rate": 4.481448235912671e-05, |
|
"loss": 0.3601, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.46398786959818045, |
|
"grad_norm": 0.974331259727478, |
|
"learning_rate": 4.306073275629044e-05, |
|
"loss": 0.2929, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4670204700530705, |
|
"grad_norm": 0.9359554052352905, |
|
"learning_rate": 4.133621928133665e-05, |
|
"loss": 0.3445, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4700530705079606, |
|
"grad_norm": 2.2175326347351074, |
|
"learning_rate": 3.964141339903026e-05, |
|
"loss": 1.4245, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.47308567096285065, |
|
"grad_norm": 1.6879606246948242, |
|
"learning_rate": 3.797677845237696e-05, |
|
"loss": 1.6504, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4761182714177407, |
|
"grad_norm": 1.5661247968673706, |
|
"learning_rate": 3.634276953594982e-05, |
|
"loss": 1.2227, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4791508718726308, |
|
"grad_norm": 1.6378949880599976, |
|
"learning_rate": 3.473983337147118e-05, |
|
"loss": 1.0772, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.48218347232752085, |
|
"grad_norm": 1.839331865310669, |
|
"learning_rate": 3.316840818568315e-05, |
|
"loss": 0.6356, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4852160727824109, |
|
"grad_norm": 1.9953904151916504, |
|
"learning_rate": 3.162892359054098e-05, |
|
"loss": 0.6139, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.488248673237301, |
|
"grad_norm": 4.8791184425354, |
|
"learning_rate": 3.0121800465761293e-05, |
|
"loss": 0.7722, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.49128127369219105, |
|
"grad_norm": 2.2169203758239746, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 0.757, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4943138741470811, |
|
"grad_norm": 1.8224825859069824, |
|
"learning_rate": 2.7206277796996144e-05, |
|
"loss": 0.8179, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4973464746019712, |
|
"grad_norm": 0.8757375478744507, |
|
"learning_rate": 2.5798675327796993e-05, |
|
"loss": 0.5202, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.5003790750568613, |
|
"grad_norm": 0.8012860417366028, |
|
"learning_rate": 2.4425028260620715e-05, |
|
"loss": 0.1801, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5034116755117514, |
|
"grad_norm": 0.6913158297538757, |
|
"learning_rate": 2.3085712136859668e-05, |
|
"loss": 0.2741, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5064442759666414, |
|
"grad_norm": 0.6833317279815674, |
|
"learning_rate": 2.178109311216913e-05, |
|
"loss": 0.2653, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5094768764215315, |
|
"grad_norm": 0.9797353744506836, |
|
"learning_rate": 2.0511527856363912e-05, |
|
"loss": 0.6669, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5125094768764216, |
|
"grad_norm": 2.5909066200256348, |
|
"learning_rate": 1.927736345590839e-05, |
|
"loss": 0.2745, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5155420773313116, |
|
"grad_norm": 2.055574655532837, |
|
"learning_rate": 1.8078937319026654e-05, |
|
"loss": 0.336, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5185746777862017, |
|
"grad_norm": 1.7324495315551758, |
|
"learning_rate": 1.6916577083458228e-05, |
|
"loss": 0.2687, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5216072782410918, |
|
"grad_norm": 0.9579265117645264, |
|
"learning_rate": 1.579060052688548e-05, |
|
"loss": 0.8671, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5246398786959818, |
|
"grad_norm": 0.8632516860961914, |
|
"learning_rate": 1.4701315480056164e-05, |
|
"loss": 0.5554, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5276724791508719, |
|
"grad_norm": 0.8002261519432068, |
|
"learning_rate": 1.3649019742625623e-05, |
|
"loss": 0.5221, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.530705079605762, |
|
"grad_norm": 0.9454600811004639, |
|
"learning_rate": 1.2634001001741373e-05, |
|
"loss": 0.7456, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.530705079605762, |
|
"eval_loss": 0.7249814867973328, |
|
"eval_runtime": 1.6332, |
|
"eval_samples_per_second": 30.615, |
|
"eval_steps_per_second": 4.286, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.533737680060652, |
|
"grad_norm": 0.9941129088401794, |
|
"learning_rate": 1.1656536753392287e-05, |
|
"loss": 1.0431, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5367702805155421, |
|
"grad_norm": 1.2236559391021729, |
|
"learning_rate": 1.0716894226543953e-05, |
|
"loss": 1.7665, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5398028809704322, |
|
"grad_norm": 1.2112020254135132, |
|
"learning_rate": 9.815330310080887e-06, |
|
"loss": 1.407, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5428354814253222, |
|
"grad_norm": 0.9938328266143799, |
|
"learning_rate": 8.952091482575824e-06, |
|
"loss": 1.0979, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5458680818802123, |
|
"grad_norm": 1.114398717880249, |
|
"learning_rate": 8.127413744904804e-06, |
|
"loss": 1.1426, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5489006823351024, |
|
"grad_norm": 1.4169811010360718, |
|
"learning_rate": 7.34152255572697e-06, |
|
"loss": 1.6449, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5519332827899924, |
|
"grad_norm": 1.2509397268295288, |
|
"learning_rate": 6.594632769846353e-06, |
|
"loss": 1.3782, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5549658832448825, |
|
"grad_norm": 1.321242094039917, |
|
"learning_rate": 5.886948579472778e-06, |
|
"loss": 1.46, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5579984836997726, |
|
"grad_norm": 1.6065739393234253, |
|
"learning_rate": 5.218663458397715e-06, |
|
"loss": 2.0705, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5610310841546626, |
|
"grad_norm": 1.637670874595642, |
|
"learning_rate": 4.589960109100444e-06, |
|
"loss": 1.8571, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5640636846095527, |
|
"grad_norm": 1.617815613746643, |
|
"learning_rate": 4.001010412799138e-06, |
|
"loss": 2.0011, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5670962850644428, |
|
"grad_norm": 1.380850911140442, |
|
"learning_rate": 3.451975382460109e-06, |
|
"loss": 1.7508, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5701288855193328, |
|
"grad_norm": 1.7648086547851562, |
|
"learning_rate": 2.9430051187785962e-06, |
|
"loss": 1.2211, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5731614859742229, |
|
"grad_norm": 1.3612362146377563, |
|
"learning_rate": 2.4742387691426445e-06, |
|
"loss": 1.0974, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.576194086429113, |
|
"grad_norm": 1.6548246145248413, |
|
"learning_rate": 2.0458044895916513e-06, |
|
"loss": 0.7674, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.579226686884003, |
|
"grad_norm": 1.6227363348007202, |
|
"learning_rate": 1.6578194097797258e-06, |
|
"loss": 0.4966, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5822592873388931, |
|
"grad_norm": 2.1612672805786133, |
|
"learning_rate": 1.3103896009537207e-06, |
|
"loss": 0.5805, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5852918877937832, |
|
"grad_norm": 1.482163667678833, |
|
"learning_rate": 1.0036100469542786e-06, |
|
"loss": 0.5068, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5883244882486732, |
|
"grad_norm": 1.0745387077331543, |
|
"learning_rate": 7.375646182482875e-07, |
|
"loss": 0.2758, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5913570887035633, |
|
"grad_norm": 1.2835489511489868, |
|
"learning_rate": 5.123260489995229e-07, |
|
"loss": 0.2952, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5943896891584534, |
|
"grad_norm": 1.2066874504089355, |
|
"learning_rate": 3.2795591718381975e-07, |
|
"loss": 0.2233, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5974222896133434, |
|
"grad_norm": 0.9106021523475647, |
|
"learning_rate": 1.8450462775428942e-07, |
|
"loss": 0.2002, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.6004548900682335, |
|
"grad_norm": 1.0497798919677734, |
|
"learning_rate": 8.201139886109264e-08, |
|
"loss": 0.1902, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.6034874905231236, |
|
"grad_norm": 0.9898223876953125, |
|
"learning_rate": 2.0504251129649374e-08, |
|
"loss": 0.157, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6065200909780136, |
|
"grad_norm": 0.5523911118507385, |
|
"learning_rate": 0.0, |
|
"loss": 0.0983, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6065200909780136, |
|
"eval_loss": 0.7618037462234497, |
|
"eval_runtime": 1.6354, |
|
"eval_samples_per_second": 30.573, |
|
"eval_steps_per_second": 4.28, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.128243999100109e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|