|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9012658227848102, |
|
"eval_steps": 500, |
|
"global_step": 801, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.33328112959861755, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9029, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.341234415769577, |
|
"learning_rate": 4e-05, |
|
"loss": 0.9276, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.3182106614112854, |
|
"learning_rate": 6e-05, |
|
"loss": 0.9148, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.22865141928195953, |
|
"learning_rate": 8e-05, |
|
"loss": 0.8421, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.29670122265815735, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7984, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.28762516379356384, |
|
"learning_rate": 0.00012, |
|
"loss": 0.7625, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.23907965421676636, |
|
"learning_rate": 0.00014, |
|
"loss": 0.7226, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.19532263278961182, |
|
"learning_rate": 0.00016, |
|
"loss": 0.7043, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.1425202488899231, |
|
"learning_rate": 0.00018, |
|
"loss": 0.6784, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.10882167518138885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6579, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.11275648325681686, |
|
"learning_rate": 0.00019999935985220405, |
|
"loss": 0.6592, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.10155748575925827, |
|
"learning_rate": 0.00019999743941701188, |
|
"loss": 0.6554, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0845816433429718, |
|
"learning_rate": 0.0001999942387190108, |
|
"loss": 0.6513, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.09046202898025513, |
|
"learning_rate": 0.0001999897577991792, |
|
"loss": 0.6267, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.08296829462051392, |
|
"learning_rate": 0.00019998399671488612, |
|
"loss": 0.6434, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.07594181597232819, |
|
"learning_rate": 0.00019997695553989042, |
|
"loss": 0.6096, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0683172270655632, |
|
"learning_rate": 0.00019996863436433997, |
|
"loss": 0.6143, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.05627186596393585, |
|
"learning_rate": 0.0001999590332947704, |
|
"loss": 0.6024, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.05644279345870018, |
|
"learning_rate": 0.00019994815245410384, |
|
"loss": 0.595, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.05661479011178017, |
|
"learning_rate": 0.00019993599198164715, |
|
"loss": 0.5759, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.04077177122235298, |
|
"learning_rate": 0.00019992255203309033, |
|
"loss": 0.582, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.04510512948036194, |
|
"learning_rate": 0.00019990783278050448, |
|
"loss": 0.5751, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0470162108540535, |
|
"learning_rate": 0.00019989183441233952, |
|
"loss": 0.5716, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.04402562975883484, |
|
"learning_rate": 0.00019987455713342187, |
|
"loss": 0.564, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.045594893395900726, |
|
"learning_rate": 0.00019985600116495173, |
|
"loss": 0.5657, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.037670280784368515, |
|
"learning_rate": 0.0001998361667445004, |
|
"loss": 0.5619, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.034366946667432785, |
|
"learning_rate": 0.00019981505412600706, |
|
"loss": 0.554, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.044084370136260986, |
|
"learning_rate": 0.00019979266357977564, |
|
"loss": 0.5527, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.04839107394218445, |
|
"learning_rate": 0.0001997689953924713, |
|
"loss": 0.5534, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.04943666234612465, |
|
"learning_rate": 0.0001997440498671168, |
|
"loss": 0.5357, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.05644814297556877, |
|
"learning_rate": 0.00019971782732308867, |
|
"loss": 0.5388, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.05796538665890694, |
|
"learning_rate": 0.00019969032809611287, |
|
"loss": 0.5327, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.05399211496114731, |
|
"learning_rate": 0.0001996615525382609, |
|
"loss": 0.5447, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.03574652597308159, |
|
"learning_rate": 0.0001996315010179449, |
|
"loss": 0.5213, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.04394914582371712, |
|
"learning_rate": 0.00019960017391991314, |
|
"loss": 0.5247, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.05197073519229889, |
|
"learning_rate": 0.00019956757164524516, |
|
"loss": 0.5253, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.05605654790997505, |
|
"learning_rate": 0.00019953369461134634, |
|
"loss": 0.5289, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.05167644843459129, |
|
"learning_rate": 0.00019949854325194294, |
|
"loss": 0.5223, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.04118728265166283, |
|
"learning_rate": 0.0001994621180170762, |
|
"loss": 0.52, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.0373263917863369, |
|
"learning_rate": 0.00019942441937309684, |
|
"loss": 0.5076, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.04870600998401642, |
|
"learning_rate": 0.0001993854478026589, |
|
"loss": 0.5287, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.05014103651046753, |
|
"learning_rate": 0.00019934520380471372, |
|
"loss": 0.5048, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.04807833209633827, |
|
"learning_rate": 0.0001993036878945034, |
|
"loss": 0.5075, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.040045421570539474, |
|
"learning_rate": 0.0001992609006035543, |
|
"loss": 0.4954, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.04202349856495857, |
|
"learning_rate": 0.00019921684247967028, |
|
"loss": 0.4953, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.041629109531641006, |
|
"learning_rate": 0.0001991715140869255, |
|
"loss": 0.501, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.04767894372344017, |
|
"learning_rate": 0.0001991249160056574, |
|
"loss": 0.4878, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.05395263060927391, |
|
"learning_rate": 0.00019907704883245916, |
|
"loss": 0.5014, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.06040235981345177, |
|
"learning_rate": 0.00019902791318017205, |
|
"loss": 0.5043, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.06749273091554642, |
|
"learning_rate": 0.0001989775096778777, |
|
"loss": 0.4931, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.06482889503240585, |
|
"learning_rate": 0.00019892583897088994, |
|
"loss": 0.4869, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.045358914881944656, |
|
"learning_rate": 0.0001988729017207465, |
|
"loss": 0.479, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.043360061943531036, |
|
"learning_rate": 0.00019881869860520073, |
|
"loss": 0.4953, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.060205183923244476, |
|
"learning_rate": 0.00019876323031821266, |
|
"loss": 0.4705, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.05729120969772339, |
|
"learning_rate": 0.00019870649756994037, |
|
"loss": 0.4887, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.03843148052692413, |
|
"learning_rate": 0.00019864850108673073, |
|
"loss": 0.4737, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.053673889487981796, |
|
"learning_rate": 0.00019858924161111015, |
|
"loss": 0.4817, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.05148368701338768, |
|
"learning_rate": 0.00019852871990177503, |
|
"loss": 0.4763, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.05371672287583351, |
|
"learning_rate": 0.00019846693673358226, |
|
"loss": 0.4751, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.05490916967391968, |
|
"learning_rate": 0.00019840389289753896, |
|
"loss": 0.457, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.04629400372505188, |
|
"learning_rate": 0.00019833958920079255, |
|
"loss": 0.4692, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.051137253642082214, |
|
"learning_rate": 0.00019827402646662047, |
|
"loss": 0.4614, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.051790811121463776, |
|
"learning_rate": 0.0001982072055344195, |
|
"loss": 0.4594, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.0445956289768219, |
|
"learning_rate": 0.00019813912725969509, |
|
"loss": 0.4601, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.04766576737165451, |
|
"learning_rate": 0.0001980697925140504, |
|
"loss": 0.4631, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.04839074984192848, |
|
"learning_rate": 0.0001979992021851751, |
|
"loss": 0.4605, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.04736727103590965, |
|
"learning_rate": 0.0001979273571768341, |
|
"loss": 0.4617, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.057293377816677094, |
|
"learning_rate": 0.0001978542584088558, |
|
"loss": 0.4621, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.05025665834546089, |
|
"learning_rate": 0.0001977799068171206, |
|
"loss": 0.4671, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.057366687804460526, |
|
"learning_rate": 0.0001977043033535486, |
|
"loss": 0.4521, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.07595837116241455, |
|
"learning_rate": 0.00019762744898608762, |
|
"loss": 0.4671, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.07574213296175003, |
|
"learning_rate": 0.0001975493446987007, |
|
"loss": 0.4664, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.06472938507795334, |
|
"learning_rate": 0.00019746999149135362, |
|
"loss": 0.456, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.05983012542128563, |
|
"learning_rate": 0.00019738939038000205, |
|
"loss": 0.4459, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.05136057734489441, |
|
"learning_rate": 0.00019730754239657842, |
|
"loss": 0.4486, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.06191498041152954, |
|
"learning_rate": 0.00019722444858897878, |
|
"loss": 0.4424, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.06742191314697266, |
|
"learning_rate": 0.0001971401100210496, |
|
"loss": 0.458, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.06019548326730728, |
|
"learning_rate": 0.00019705452777257377, |
|
"loss": 0.4423, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.05012982338666916, |
|
"learning_rate": 0.0001969677029392571, |
|
"loss": 0.4466, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.0552060566842556, |
|
"learning_rate": 0.00019687963663271409, |
|
"loss": 0.4534, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.05883748456835747, |
|
"learning_rate": 0.00019679032998045376, |
|
"loss": 0.4409, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.07146705687046051, |
|
"learning_rate": 0.00019669978412586528, |
|
"loss": 0.4582, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.054095230996608734, |
|
"learning_rate": 0.00019660800022820317, |
|
"loss": 0.4487, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.04927053675055504, |
|
"learning_rate": 0.00019651497946257266, |
|
"loss": 0.4429, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.06037526577711105, |
|
"learning_rate": 0.00019642072301991455, |
|
"loss": 0.4456, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.05555957555770874, |
|
"learning_rate": 0.00019632523210698987, |
|
"loss": 0.4382, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.04606284573674202, |
|
"learning_rate": 0.00019622850794636455, |
|
"loss": 0.4411, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.04605920985341072, |
|
"learning_rate": 0.00019613055177639384, |
|
"loss": 0.4326, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.050325632095336914, |
|
"learning_rate": 0.0001960313648512062, |
|
"loss": 0.4338, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.04921424016356468, |
|
"learning_rate": 0.00019593094844068748, |
|
"loss": 0.4316, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.04333706200122833, |
|
"learning_rate": 0.00019582930383046457, |
|
"loss": 0.4441, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.048454612493515015, |
|
"learning_rate": 0.0001957264323218889, |
|
"loss": 0.4382, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.0541059784591198, |
|
"learning_rate": 0.00019562233523201986, |
|
"loss": 0.4328, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.043696511536836624, |
|
"learning_rate": 0.00019551701389360795, |
|
"loss": 0.4335, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.04407835751771927, |
|
"learning_rate": 0.00019541046965507758, |
|
"loss": 0.4327, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.05477238819003105, |
|
"learning_rate": 0.00019530270388050998, |
|
"loss": 0.4294, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.05609311908483505, |
|
"learning_rate": 0.00019519371794962556, |
|
"loss": 0.4305, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.045145273208618164, |
|
"learning_rate": 0.00019508351325776642, |
|
"loss": 0.4395, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.04475285857915878, |
|
"learning_rate": 0.00019497209121587837, |
|
"loss": 0.4284, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.04405711591243744, |
|
"learning_rate": 0.00019485945325049288, |
|
"loss": 0.4214, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.04461454227566719, |
|
"learning_rate": 0.0001947456008037089, |
|
"loss": 0.4154, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.04791221395134926, |
|
"learning_rate": 0.00019463053533317425, |
|
"loss": 0.4248, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.05543987452983856, |
|
"learning_rate": 0.00019451425831206706, |
|
"loss": 0.4303, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.06330578774213791, |
|
"learning_rate": 0.00019439677122907697, |
|
"loss": 0.4274, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.05569112300872803, |
|
"learning_rate": 0.00019427807558838588, |
|
"loss": 0.4234, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.047680530697107315, |
|
"learning_rate": 0.00019415817290964883, |
|
"loss": 0.4155, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.05214262008666992, |
|
"learning_rate": 0.0001940370647279746, |
|
"loss": 0.4224, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.06332990527153015, |
|
"learning_rate": 0.00019391475259390584, |
|
"loss": 0.4233, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.05726313218474388, |
|
"learning_rate": 0.00019379123807339942, |
|
"loss": 0.4118, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.044936031103134155, |
|
"learning_rate": 0.00019366652274780628, |
|
"loss": 0.4296, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.05117325484752655, |
|
"learning_rate": 0.0001935406082138513, |
|
"loss": 0.4287, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.058542776852846146, |
|
"learning_rate": 0.00019341349608361267, |
|
"loss": 0.4213, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.056066304445266724, |
|
"learning_rate": 0.00019328518798450138, |
|
"loss": 0.4174, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.049762677401304245, |
|
"learning_rate": 0.00019315568555924035, |
|
"loss": 0.418, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.043821126222610474, |
|
"learning_rate": 0.00019302499046584348, |
|
"loss": 0.4012, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.05036221817135811, |
|
"learning_rate": 0.00019289310437759427, |
|
"loss": 0.4237, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.050889529287815094, |
|
"learning_rate": 0.00019276002898302447, |
|
"loss": 0.4144, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.04269757494330406, |
|
"learning_rate": 0.0001926257659858925, |
|
"loss": 0.4078, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.04927165433764458, |
|
"learning_rate": 0.00019249031710516162, |
|
"loss": 0.4155, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.05124311521649361, |
|
"learning_rate": 0.00019235368407497788, |
|
"loss": 0.3966, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.04073040187358856, |
|
"learning_rate": 0.00019221586864464786, |
|
"loss": 0.4064, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.04988453537225723, |
|
"learning_rate": 0.00019207687257861655, |
|
"loss": 0.4197, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.05227258801460266, |
|
"learning_rate": 0.0001919366976564444, |
|
"loss": 0.414, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0466819666326046, |
|
"learning_rate": 0.00019179534567278475, |
|
"loss": 0.4173, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.047633491456508636, |
|
"learning_rate": 0.00019165281843736085, |
|
"loss": 0.4085, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.05280464142560959, |
|
"learning_rate": 0.00019150911777494258, |
|
"loss": 0.4051, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.052302148193120956, |
|
"learning_rate": 0.00019136424552532318, |
|
"loss": 0.42, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.04875241965055466, |
|
"learning_rate": 0.00019121820354329577, |
|
"loss": 0.4258, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.04654408246278763, |
|
"learning_rate": 0.0001910709936986293, |
|
"loss": 0.409, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.05745020881295204, |
|
"learning_rate": 0.00019092261787604492, |
|
"loss": 0.4059, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.06945241987705231, |
|
"learning_rate": 0.00019077307797519183, |
|
"loss": 0.4038, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.06346461176872253, |
|
"learning_rate": 0.00019062237591062272, |
|
"loss": 0.4031, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.058026187121868134, |
|
"learning_rate": 0.00019047051361176953, |
|
"loss": 0.4126, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.04755179584026337, |
|
"learning_rate": 0.0001903174930229185, |
|
"loss": 0.4209, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.05765068158507347, |
|
"learning_rate": 0.0001901633161031856, |
|
"loss": 0.4067, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.05687811225652695, |
|
"learning_rate": 0.000190007984826491, |
|
"loss": 0.3975, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.04930473491549492, |
|
"learning_rate": 0.0001898515011815343, |
|
"loss": 0.4146, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.05147051811218262, |
|
"learning_rate": 0.0001896938671717687, |
|
"loss": 0.4035, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.05680418014526367, |
|
"learning_rate": 0.0001895350848153754, |
|
"loss": 0.4049, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.0444297268986702, |
|
"learning_rate": 0.00018937515614523797, |
|
"loss": 0.4085, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.05083802342414856, |
|
"learning_rate": 0.00018921408320891612, |
|
"loss": 0.4036, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.04978756606578827, |
|
"learning_rate": 0.00018905186806861957, |
|
"loss": 0.4058, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.04963681101799011, |
|
"learning_rate": 0.00018888851280118155, |
|
"loss": 0.3977, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.0466095507144928, |
|
"learning_rate": 0.00018872401949803237, |
|
"loss": 0.3945, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.04972768574953079, |
|
"learning_rate": 0.00018855839026517257, |
|
"loss": 0.4151, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.054370637983083725, |
|
"learning_rate": 0.0001883916272231459, |
|
"loss": 0.3944, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.054699357599020004, |
|
"learning_rate": 0.00018822373250701224, |
|
"loss": 0.3989, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.054452769458293915, |
|
"learning_rate": 0.00018805470826632024, |
|
"loss": 0.3984, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.04596908017992973, |
|
"learning_rate": 0.00018788455666507981, |
|
"loss": 0.4018, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.054354868829250336, |
|
"learning_rate": 0.00018771327988173435, |
|
"loss": 0.3985, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.05570242181420326, |
|
"learning_rate": 0.00018754088010913304, |
|
"loss": 0.3818, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.054722048342227936, |
|
"learning_rate": 0.00018736735955450251, |
|
"loss": 0.4111, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.04620000347495079, |
|
"learning_rate": 0.00018719272043941882, |
|
"loss": 0.3949, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.048443205654621124, |
|
"learning_rate": 0.00018701696499977884, |
|
"loss": 0.3856, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.06628945469856262, |
|
"learning_rate": 0.00018684009548577168, |
|
"loss": 0.4048, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.05339967459440231, |
|
"learning_rate": 0.00018666211416184999, |
|
"loss": 0.3894, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.04650304839015007, |
|
"learning_rate": 0.00018648302330670082, |
|
"loss": 0.4004, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.05634591728448868, |
|
"learning_rate": 0.00018630282521321645, |
|
"loss": 0.4033, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.048666685819625854, |
|
"learning_rate": 0.00018612152218846513, |
|
"loss": 0.399, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.04597772657871246, |
|
"learning_rate": 0.0001859391165536615, |
|
"loss": 0.3931, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.0526028610765934, |
|
"learning_rate": 0.00018575561064413689, |
|
"loss": 0.3879, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.05867009237408638, |
|
"learning_rate": 0.00018557100680930937, |
|
"loss": 0.3905, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.05077454075217247, |
|
"learning_rate": 0.00018538530741265364, |
|
"loss": 0.395, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.0461389385163784, |
|
"learning_rate": 0.00018519851483167097, |
|
"loss": 0.4016, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.059010252356529236, |
|
"learning_rate": 0.00018501063145785846, |
|
"loss": 0.3823, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.06437338888645172, |
|
"learning_rate": 0.00018482165969667874, |
|
"loss": 0.3918, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.04585932940244675, |
|
"learning_rate": 0.00018463160196752887, |
|
"loss": 0.3808, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.05361521616578102, |
|
"learning_rate": 0.00018444046070370963, |
|
"loss": 0.3858, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.05653822794556618, |
|
"learning_rate": 0.00018424823835239417, |
|
"loss": 0.3785, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.04439689964056015, |
|
"learning_rate": 0.0001840549373745968, |
|
"loss": 0.3894, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.05564529448747635, |
|
"learning_rate": 0.00018386056024514137, |
|
"loss": 0.3883, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.06035888195037842, |
|
"learning_rate": 0.00018366510945262972, |
|
"loss": 0.3855, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.044238511472940445, |
|
"learning_rate": 0.0001834685874994098, |
|
"loss": 0.3934, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.050235260277986526, |
|
"learning_rate": 0.00018327099690154344, |
|
"loss": 0.3819, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.051336683332920074, |
|
"learning_rate": 0.00018307234018877434, |
|
"loss": 0.3897, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.045052576810121536, |
|
"learning_rate": 0.0001828726199044957, |
|
"loss": 0.3822, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.05162283405661583, |
|
"learning_rate": 0.00018267183860571753, |
|
"loss": 0.4047, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0488157793879509, |
|
"learning_rate": 0.00018246999886303383, |
|
"loss": 0.3947, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.04454487934708595, |
|
"learning_rate": 0.00018226710326059006, |
|
"loss": 0.3942, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0500001423060894, |
|
"learning_rate": 0.0001820631543960496, |
|
"loss": 0.3826, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.04919865354895592, |
|
"learning_rate": 0.00018185815488056076, |
|
"loss": 0.3791, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.04547140747308731, |
|
"learning_rate": 0.00018165210733872336, |
|
"loss": 0.3879, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.044622063636779785, |
|
"learning_rate": 0.00018144501440855496, |
|
"loss": 0.3778, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.04467932507395744, |
|
"learning_rate": 0.00018123687874145721, |
|
"loss": 0.3994, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.04281982406973839, |
|
"learning_rate": 0.0001810277030021819, |
|
"loss": 0.3817, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.05303504317998886, |
|
"learning_rate": 0.00018081748986879679, |
|
"loss": 0.3749, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.046573616564273834, |
|
"learning_rate": 0.00018060624203265134, |
|
"loss": 0.3866, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.044320229440927505, |
|
"learning_rate": 0.00018039396219834237, |
|
"loss": 0.3732, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.05708359181880951, |
|
"learning_rate": 0.00018018065308367912, |
|
"loss": 0.3863, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.045029208064079285, |
|
"learning_rate": 0.00017996631741964888, |
|
"loss": 0.3862, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.055195923894643784, |
|
"learning_rate": 0.00017975095795038165, |
|
"loss": 0.3835, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.048293352127075195, |
|
"learning_rate": 0.00017953457743311523, |
|
"loss": 0.374, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.04677055403590202, |
|
"learning_rate": 0.00017931717863815987, |
|
"loss": 0.377, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.04955766722559929, |
|
"learning_rate": 0.00017909876434886273, |
|
"loss": 0.3808, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.04526973515748978, |
|
"learning_rate": 0.00017887933736157233, |
|
"loss": 0.3796, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.043622203171253204, |
|
"learning_rate": 0.00017865890048560277, |
|
"loss": 0.376, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.046581387519836426, |
|
"learning_rate": 0.0001784374565431976, |
|
"loss": 0.3716, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.04433497413992882, |
|
"learning_rate": 0.00017821500836949386, |
|
"loss": 0.3715, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.04146367311477661, |
|
"learning_rate": 0.00017799155881248572, |
|
"loss": 0.3809, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.045288585126399994, |
|
"learning_rate": 0.000177767110732988, |
|
"loss": 0.3885, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.04070120304822922, |
|
"learning_rate": 0.00017754166700459958, |
|
"loss": 0.3713, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.042820919305086136, |
|
"learning_rate": 0.00017731523051366658, |
|
"loss": 0.3839, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.04416365176439285, |
|
"learning_rate": 0.00017708780415924539, |
|
"loss": 0.3728, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.04461952671408653, |
|
"learning_rate": 0.00017685939085306562, |
|
"loss": 0.373, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.04675828292965889, |
|
"learning_rate": 0.00017662999351949278, |
|
"loss": 0.3711, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.04258272796869278, |
|
"learning_rate": 0.00017639961509549078, |
|
"loss": 0.3782, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.04638506844639778, |
|
"learning_rate": 0.00017616825853058443, |
|
"loss": 0.3592, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.04781416058540344, |
|
"learning_rate": 0.00017593592678682166, |
|
"loss": 0.383, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.04813629388809204, |
|
"learning_rate": 0.00017570262283873552, |
|
"loss": 0.3775, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.046996332705020905, |
|
"learning_rate": 0.00017546834967330617, |
|
"loss": 0.3815, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.04889595881104469, |
|
"learning_rate": 0.00017523311028992268, |
|
"loss": 0.3636, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.04298345744609833, |
|
"learning_rate": 0.00017499690770034443, |
|
"loss": 0.3672, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.04219110682606697, |
|
"learning_rate": 0.00017475974492866278, |
|
"loss": 0.3801, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.051573265343904495, |
|
"learning_rate": 0.00017452162501126227, |
|
"loss": 0.3778, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.048954349011182785, |
|
"learning_rate": 0.00017428255099678167, |
|
"loss": 0.3849, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.042610183358192444, |
|
"learning_rate": 0.0001740425259460751, |
|
"loss": 0.3682, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.04517417773604393, |
|
"learning_rate": 0.00017380155293217264, |
|
"loss": 0.3827, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.04968888312578201, |
|
"learning_rate": 0.00017355963504024123, |
|
"loss": 0.3821, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.051313381642103195, |
|
"learning_rate": 0.0001733167753675449, |
|
"loss": 0.381, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.044351302087306976, |
|
"learning_rate": 0.0001730729770234054, |
|
"loss": 0.381, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.03970547392964363, |
|
"learning_rate": 0.00017282824312916218, |
|
"loss": 0.3698, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.04822370782494545, |
|
"learning_rate": 0.00017258257681813244, |
|
"loss": 0.3838, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.045927174389362335, |
|
"learning_rate": 0.0001723359812355712, |
|
"loss": 0.3662, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.042983219027519226, |
|
"learning_rate": 0.00017208845953863076, |
|
"loss": 0.3574, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.0422198586165905, |
|
"learning_rate": 0.0001718400148963206, |
|
"loss": 0.3559, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.042307570576667786, |
|
"learning_rate": 0.00017159065048946644, |
|
"loss": 0.3834, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04701109230518341, |
|
"learning_rate": 0.0001713403695106698, |
|
"loss": 0.3718, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04007503017783165, |
|
"learning_rate": 0.00017108917516426704, |
|
"loss": 0.3785, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04560061916708946, |
|
"learning_rate": 0.00017083707066628832, |
|
"loss": 0.3713, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04315731301903725, |
|
"learning_rate": 0.00017058405924441636, |
|
"loss": 0.3702, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.040260497480630875, |
|
"learning_rate": 0.0001703301441379453, |
|
"loss": 0.367, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04882992431521416, |
|
"learning_rate": 0.000170075328597739, |
|
"loss": 0.3737, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04410382732748985, |
|
"learning_rate": 0.0001698196158861896, |
|
"loss": 0.3625, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04889338091015816, |
|
"learning_rate": 0.00016956300927717575, |
|
"loss": 0.3697, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.044603537768125534, |
|
"learning_rate": 0.00016930551205602043, |
|
"loss": 0.3729, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0539550743997097, |
|
"learning_rate": 0.00016904712751944931, |
|
"loss": 0.3625, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.04753349721431732, |
|
"learning_rate": 0.00016878785897554818, |
|
"loss": 0.3662, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.04425463080406189, |
|
"learning_rate": 0.0001685277097437208, |
|
"loss": 0.3595, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.04160892590880394, |
|
"learning_rate": 0.0001682666831546463, |
|
"loss": 0.3679, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.04600003361701965, |
|
"learning_rate": 0.0001680047825502366, |
|
"loss": 0.3702, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.03887678310275078, |
|
"learning_rate": 0.00016774201128359357, |
|
"loss": 0.3633, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0477156862616539, |
|
"learning_rate": 0.00016747837271896622, |
|
"loss": 0.3702, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.04244072362780571, |
|
"learning_rate": 0.00016721387023170737, |
|
"loss": 0.3668, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.04049496725201607, |
|
"learning_rate": 0.0001669485072082308, |
|
"loss": 0.3785, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.04432998597621918, |
|
"learning_rate": 0.00016668228704596756, |
|
"loss": 0.3703, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.0432085320353508, |
|
"learning_rate": 0.00016641521315332265, |
|
"loss": 0.3615, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.03820549696683884, |
|
"learning_rate": 0.00016614728894963135, |
|
"loss": 0.3483, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.04436295107007027, |
|
"learning_rate": 0.00016587851786511543, |
|
"loss": 0.3661, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.04371733218431473, |
|
"learning_rate": 0.00016560890334083926, |
|
"loss": 0.3503, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.039205193519592285, |
|
"learning_rate": 0.00016533844882866568, |
|
"loss": 0.3482, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.04308384284377098, |
|
"learning_rate": 0.00016506715779121187, |
|
"loss": 0.373, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.040143441408872604, |
|
"learning_rate": 0.00016479503370180507, |
|
"loss": 0.3609, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.03845199570059776, |
|
"learning_rate": 0.000164522080044438, |
|
"loss": 0.3644, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.039730221033096313, |
|
"learning_rate": 0.00016424830031372425, |
|
"loss": 0.3514, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.04021477699279785, |
|
"learning_rate": 0.00016397369801485366, |
|
"loss": 0.3566, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.03929613530635834, |
|
"learning_rate": 0.00016369827666354745, |
|
"loss": 0.3649, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.040966227650642395, |
|
"learning_rate": 0.0001634220397860129, |
|
"loss": 0.3661, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.036787159740924835, |
|
"learning_rate": 0.0001631449909188987, |
|
"loss": 0.3572, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.039864350110292435, |
|
"learning_rate": 0.00016286713360924918, |
|
"loss": 0.3593, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.039622630923986435, |
|
"learning_rate": 0.00016258847141445928, |
|
"loss": 0.3711, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.03840857744216919, |
|
"learning_rate": 0.00016230900790222878, |
|
"loss": 0.3537, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.03800921142101288, |
|
"learning_rate": 0.00016202874665051674, |
|
"loss": 0.3662, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.03894530236721039, |
|
"learning_rate": 0.0001617476912474956, |
|
"loss": 0.3633, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.04486812278628349, |
|
"learning_rate": 0.00016146584529150526, |
|
"loss": 0.3594, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.0394977331161499, |
|
"learning_rate": 0.00016118321239100712, |
|
"loss": 0.3473, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.03960977867245674, |
|
"learning_rate": 0.0001608997961645377, |
|
"loss": 0.363, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.04547852650284767, |
|
"learning_rate": 0.00016061560024066248, |
|
"loss": 0.3698, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.045081403106451035, |
|
"learning_rate": 0.00016033062825792935, |
|
"loss": 0.3684, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.03854916989803314, |
|
"learning_rate": 0.00016004488386482205, |
|
"loss": 0.3467, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.042972736060619354, |
|
"learning_rate": 0.0001597583707197134, |
|
"loss": 0.3589, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.0463341549038887, |
|
"learning_rate": 0.0001594710924908186, |
|
"loss": 0.3517, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.04469626024365425, |
|
"learning_rate": 0.00015918305285614822, |
|
"loss": 0.3509, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.037887342274188995, |
|
"learning_rate": 0.0001588942555034609, |
|
"loss": 0.3622, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.046203695237636566, |
|
"learning_rate": 0.00015860470413021642, |
|
"loss": 0.3587, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.043443821370601654, |
|
"learning_rate": 0.00015831440244352832, |
|
"loss": 0.3742, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.037403471767902374, |
|
"learning_rate": 0.00015802335416011625, |
|
"loss": 0.357, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.041037216782569885, |
|
"learning_rate": 0.00015773156300625857, |
|
"loss": 0.347, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.04024377465248108, |
|
"learning_rate": 0.00015743903271774455, |
|
"loss": 0.3584, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.04302544146776199, |
|
"learning_rate": 0.0001571457670398266, |
|
"loss": 0.3754, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.04049861803650856, |
|
"learning_rate": 0.00015685176972717223, |
|
"loss": 0.3617, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.04286986216902733, |
|
"learning_rate": 0.0001565570445438161, |
|
"loss": 0.3459, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.04489251226186752, |
|
"learning_rate": 0.00015626159526311174, |
|
"loss": 0.342, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.039040662348270416, |
|
"learning_rate": 0.00015596542566768327, |
|
"loss": 0.3579, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.041434600949287415, |
|
"learning_rate": 0.00015566853954937694, |
|
"loss": 0.3607, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.040113747119903564, |
|
"learning_rate": 0.00015537094070921267, |
|
"loss": 0.3581, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.047135982662439346, |
|
"learning_rate": 0.00015507263295733528, |
|
"loss": 0.355, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.04679419472813606, |
|
"learning_rate": 0.00015477362011296575, |
|
"loss": 0.3506, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.04256124794483185, |
|
"learning_rate": 0.00015447390600435238, |
|
"loss": 0.3547, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.041164420545101166, |
|
"learning_rate": 0.00015417349446872168, |
|
"loss": 0.3533, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.04434530436992645, |
|
"learning_rate": 0.00015387238935222927, |
|
"loss": 0.3655, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.042684029787778854, |
|
"learning_rate": 0.00015357059450991068, |
|
"loss": 0.3388, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.03664594888687134, |
|
"learning_rate": 0.00015326811380563204, |
|
"loss": 0.3415, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.044670287519693375, |
|
"learning_rate": 0.0001529649511120404, |
|
"loss": 0.3639, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.043318092823028564, |
|
"learning_rate": 0.00015266111031051442, |
|
"loss": 0.3705, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.03925277665257454, |
|
"learning_rate": 0.00015235659529111445, |
|
"loss": 0.3452, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.048936985433101654, |
|
"learning_rate": 0.00015205140995253283, |
|
"loss": 0.3511, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.04244200140237808, |
|
"learning_rate": 0.00015174555820204408, |
|
"loss": 0.3573, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.041375573724508286, |
|
"learning_rate": 0.00015143904395545466, |
|
"loss": 0.3474, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.04903846234083176, |
|
"learning_rate": 0.0001511318711370529, |
|
"loss": 0.3677, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.03968316316604614, |
|
"learning_rate": 0.0001508240436795589, |
|
"loss": 0.3504, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.038755469024181366, |
|
"learning_rate": 0.00015051556552407399, |
|
"loss": 0.3594, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.03975263237953186, |
|
"learning_rate": 0.00015020644062003046, |
|
"loss": 0.3536, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.03559111803770065, |
|
"learning_rate": 0.0001498966729251408, |
|
"loss": 0.3489, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.0421811044216156, |
|
"learning_rate": 0.0001495862664053471, |
|
"loss": 0.3558, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.04090610146522522, |
|
"learning_rate": 0.00014927522503477048, |
|
"loss": 0.3532, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.04141204059123993, |
|
"learning_rate": 0.00014896355279565976, |
|
"loss": 0.361, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.039628610014915466, |
|
"learning_rate": 0.00014865125367834092, |
|
"loss": 0.353, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.040805645287036896, |
|
"learning_rate": 0.00014833833168116582, |
|
"loss": 0.3442, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.03766808658838272, |
|
"learning_rate": 0.00014802479081046102, |
|
"loss": 0.3429, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.03960324451327324, |
|
"learning_rate": 0.00014771063508047636, |
|
"loss": 0.3641, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.03804844617843628, |
|
"learning_rate": 0.0001473958685133339, |
|
"loss": 0.3462, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.03872362896800041, |
|
"learning_rate": 0.0001470804951389761, |
|
"loss": 0.3529, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.038420502096414566, |
|
"learning_rate": 0.00014676451899511437, |
|
"loss": 0.3594, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.038893669843673706, |
|
"learning_rate": 0.00014644794412717736, |
|
"loss": 0.3461, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.039783939719200134, |
|
"learning_rate": 0.00014613077458825913, |
|
"loss": 0.3641, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.040646251291036606, |
|
"learning_rate": 0.0001458130144390673, |
|
"loss": 0.3408, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.03997185081243515, |
|
"learning_rate": 0.00014549466774787108, |
|
"loss": 0.3476, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.03733928129076958, |
|
"learning_rate": 0.00014517573859044907, |
|
"loss": 0.348, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.04240603372454643, |
|
"learning_rate": 0.00014485623105003732, |
|
"loss": 0.3526, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.04083820432424545, |
|
"learning_rate": 0.00014453614921727668, |
|
"loss": 0.358, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.038078900426626205, |
|
"learning_rate": 0.00014421549719016081, |
|
"loss": 0.3383, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.043162036687135696, |
|
"learning_rate": 0.00014389427907398342, |
|
"loss": 0.3502, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.04186828061938286, |
|
"learning_rate": 0.00014357249898128594, |
|
"loss": 0.355, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.041309986263513565, |
|
"learning_rate": 0.0001432501610318047, |
|
"loss": 0.3634, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.03907867148518562, |
|
"learning_rate": 0.00014292726935241827, |
|
"loss": 0.3431, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.04641426354646683, |
|
"learning_rate": 0.00014260382807709457, |
|
"loss": 0.3423, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.042708221822977066, |
|
"learning_rate": 0.000142279841346838, |
|
"loss": 0.3459, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.03900787606835365, |
|
"learning_rate": 0.00014195531330963635, |
|
"loss": 0.349, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.04202708601951599, |
|
"learning_rate": 0.0001416302481204078, |
|
"loss": 0.3478, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.04631033539772034, |
|
"learning_rate": 0.0001413046499409477, |
|
"loss": 0.3495, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.04463302344083786, |
|
"learning_rate": 0.00014097852293987507, |
|
"loss": 0.3422, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.03706689178943634, |
|
"learning_rate": 0.00014065187129257964, |
|
"loss": 0.3558, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.04193353280425072, |
|
"learning_rate": 0.000140324699181168, |
|
"loss": 0.3539, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.03598970174789429, |
|
"learning_rate": 0.00013999701079441028, |
|
"loss": 0.3412, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.04364067688584328, |
|
"learning_rate": 0.00013966881032768643, |
|
"loss": 0.3496, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.034331586211919785, |
|
"learning_rate": 0.00013934010198293257, |
|
"loss": 0.3495, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.04461033269762993, |
|
"learning_rate": 0.00013901088996858717, |
|
"loss": 0.3555, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.037957970052957535, |
|
"learning_rate": 0.0001386811784995371, |
|
"loss": 0.3535, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.04143986850976944, |
|
"learning_rate": 0.0001383509717970638, |
|
"loss": 0.3552, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.04176801070570946, |
|
"learning_rate": 0.0001380202740887891, |
|
"loss": 0.3462, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.038572393357753754, |
|
"learning_rate": 0.00013768908960862123, |
|
"loss": 0.3582, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.047118738293647766, |
|
"learning_rate": 0.0001373574225967004, |
|
"loss": 0.3451, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.042480722069740295, |
|
"learning_rate": 0.00013702527729934482, |
|
"loss": 0.3355, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.04277138039469719, |
|
"learning_rate": 0.00013669265796899607, |
|
"loss": 0.3395, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.04838969558477402, |
|
"learning_rate": 0.0001363595688641648, |
|
"loss": 0.3419, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.04388457536697388, |
|
"learning_rate": 0.00013602601424937604, |
|
"loss": 0.3412, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.041595809161663055, |
|
"learning_rate": 0.00013569199839511494, |
|
"loss": 0.3488, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.045120954513549805, |
|
"learning_rate": 0.0001353575255777717, |
|
"loss": 0.3486, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.04108583182096481, |
|
"learning_rate": 0.00013502260007958706, |
|
"loss": 0.3522, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.046396516263484955, |
|
"learning_rate": 0.00013468722618859743, |
|
"loss": 0.3474, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.04111646115779877, |
|
"learning_rate": 0.0001343514081985799, |
|
"loss": 0.3522, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.03970329090952873, |
|
"learning_rate": 0.00013401515040899746, |
|
"loss": 0.3504, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.041780147701501846, |
|
"learning_rate": 0.00013367845712494372, |
|
"loss": 0.3464, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.03978356719017029, |
|
"learning_rate": 0.000133341332657088, |
|
"loss": 0.3473, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.04411393776535988, |
|
"learning_rate": 0.00013300378132161992, |
|
"loss": 0.3451, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.04426994547247887, |
|
"learning_rate": 0.00013266580744019445, |
|
"loss": 0.3366, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.037810515612363815, |
|
"learning_rate": 0.00013232741533987622, |
|
"loss": 0.342, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.04545320197939873, |
|
"learning_rate": 0.00013198860935308444, |
|
"loss": 0.3553, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.04098277539014816, |
|
"learning_rate": 0.00013164939381753713, |
|
"loss": 0.3458, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.04471420869231224, |
|
"learning_rate": 0.00013130977307619594, |
|
"loss": 0.3318, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.038031481206417084, |
|
"learning_rate": 0.00013096975147721017, |
|
"loss": 0.3533, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0440196692943573, |
|
"learning_rate": 0.00013062933337386142, |
|
"loss": 0.3299, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.034114640206098557, |
|
"learning_rate": 0.00013028852312450763, |
|
"loss": 0.3401, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.042403046041727066, |
|
"learning_rate": 0.00012994732509252744, |
|
"loss": 0.3458, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0356055311858654, |
|
"learning_rate": 0.00012960574364626412, |
|
"loss": 0.3498, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.040617913007736206, |
|
"learning_rate": 0.00012926378315896998, |
|
"loss": 0.3454, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.03738779574632645, |
|
"learning_rate": 0.00012892144800875, |
|
"loss": 0.342, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.039146389812231064, |
|
"learning_rate": 0.00012857874257850605, |
|
"loss": 0.35, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.03738423436880112, |
|
"learning_rate": 0.0001282356712558806, |
|
"loss": 0.3503, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.03822367265820503, |
|
"learning_rate": 0.00012789223843320073, |
|
"loss": 0.3489, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.03619171679019928, |
|
"learning_rate": 0.00012754844850742172, |
|
"loss": 0.3392, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.03672062233090401, |
|
"learning_rate": 0.00012720430588007077, |
|
"loss": 0.3503, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.038667939603328705, |
|
"learning_rate": 0.00012685981495719087, |
|
"loss": 0.3582, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.033767540007829666, |
|
"learning_rate": 0.00012651498014928402, |
|
"loss": 0.3465, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.0382758267223835, |
|
"learning_rate": 0.00012616980587125512, |
|
"loss": 0.3435, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.03528289496898651, |
|
"learning_rate": 0.00012582429654235523, |
|
"loss": 0.3427, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.03565202280879021, |
|
"learning_rate": 0.00012547845658612508, |
|
"loss": 0.3453, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.03661198914051056, |
|
"learning_rate": 0.0001251322904303383, |
|
"loss": 0.3341, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.03750570863485336, |
|
"learning_rate": 0.00012478580250694504, |
|
"loss": 0.3375, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.03714427724480629, |
|
"learning_rate": 0.00012443899725201482, |
|
"loss": 0.3447, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.03938335180282593, |
|
"learning_rate": 0.0001240918791056801, |
|
"loss": 0.3551, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.037106383591890335, |
|
"learning_rate": 0.00012374445251207914, |
|
"loss": 0.3365, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.039744965732097626, |
|
"learning_rate": 0.00012339672191929936, |
|
"loss": 0.3333, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.04372095316648483, |
|
"learning_rate": 0.0001230486917793202, |
|
"loss": 0.344, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.03674423322081566, |
|
"learning_rate": 0.00012270036654795613, |
|
"loss": 0.3269, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.03725577890872955, |
|
"learning_rate": 0.00012235175068479984, |
|
"loss": 0.3394, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.037114035338163376, |
|
"learning_rate": 0.00012200284865316475, |
|
"loss": 0.3411, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.037100065499544144, |
|
"learning_rate": 0.00012165366492002832, |
|
"loss": 0.3351, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.042357347905635834, |
|
"learning_rate": 0.00012130420395597437, |
|
"loss": 0.3458, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.03712886944413185, |
|
"learning_rate": 0.0001209544702351363, |
|
"loss": 0.3385, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.03989335149526596, |
|
"learning_rate": 0.00012060446823513949, |
|
"loss": 0.3431, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.03781385347247124, |
|
"learning_rate": 0.0001202542024370441, |
|
"loss": 0.3444, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.03825078904628754, |
|
"learning_rate": 0.00011990367732528773, |
|
"loss": 0.3394, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.03969776630401611, |
|
"learning_rate": 0.00011955289738762796, |
|
"loss": 0.3444, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.04011238366365433, |
|
"learning_rate": 0.00011920186711508479, |
|
"loss": 0.3466, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.03450911492109299, |
|
"learning_rate": 0.00011885059100188341, |
|
"loss": 0.337, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.038461893796920776, |
|
"learning_rate": 0.00011849907354539633, |
|
"loss": 0.3347, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.037344567477703094, |
|
"learning_rate": 0.00011814731924608616, |
|
"loss": 0.3452, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.03558855876326561, |
|
"learning_rate": 0.00011779533260744757, |
|
"loss": 0.3401, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.04183749854564667, |
|
"learning_rate": 0.00011744311813595006, |
|
"loss": 0.3421, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.03574857488274574, |
|
"learning_rate": 0.00011709068034097997, |
|
"loss": 0.3271, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.03993195295333862, |
|
"learning_rate": 0.0001167380237347828, |
|
"loss": 0.3267, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.03541812300682068, |
|
"learning_rate": 0.0001163851528324056, |
|
"loss": 0.3347, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.037207312881946564, |
|
"learning_rate": 0.00011603207215163894, |
|
"loss": 0.3418, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.03919894993305206, |
|
"learning_rate": 0.00011567878621295922, |
|
"loss": 0.3516, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0382562056183815, |
|
"learning_rate": 0.00011532529953947075, |
|
"loss": 0.3499, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.03956792503595352, |
|
"learning_rate": 0.00011497161665684784, |
|
"loss": 0.347, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.03666992112994194, |
|
"learning_rate": 0.0001146177420932768, |
|
"loss": 0.3292, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.03818726912140846, |
|
"learning_rate": 0.00011426368037939813, |
|
"loss": 0.346, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0352170504629612, |
|
"learning_rate": 0.00011390943604824826, |
|
"loss": 0.3253, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.04157968610525131, |
|
"learning_rate": 0.00011355501363520185, |
|
"loss": 0.3411, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.037365157157182693, |
|
"learning_rate": 0.00011320041767791336, |
|
"loss": 0.329, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.03966519236564636, |
|
"learning_rate": 0.00011284565271625922, |
|
"loss": 0.3412, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.037126705050468445, |
|
"learning_rate": 0.00011249072329227959, |
|
"loss": 0.3387, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03643345460295677, |
|
"learning_rate": 0.00011213563395012026, |
|
"loss": 0.3392, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03495652228593826, |
|
"learning_rate": 0.0001117803892359744, |
|
"loss": 0.3327, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03979011997580528, |
|
"learning_rate": 0.00011142499369802444, |
|
"loss": 0.3298, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03582910820841789, |
|
"learning_rate": 0.00011106945188638378, |
|
"loss": 0.3424, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03996811434626579, |
|
"learning_rate": 0.00011071376835303858, |
|
"loss": 0.3335, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.039901167154312134, |
|
"learning_rate": 0.00011035794765178941, |
|
"loss": 0.3333, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03835071623325348, |
|
"learning_rate": 0.00011000199433819305, |
|
"loss": 0.328, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03873496130108833, |
|
"learning_rate": 0.00010964591296950406, |
|
"loss": 0.3414, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.03686109930276871, |
|
"learning_rate": 0.00010928970810461652, |
|
"loss": 0.3393, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.038315027952194214, |
|
"learning_rate": 0.00010893338430400562, |
|
"loss": 0.3349, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.036203257739543915, |
|
"learning_rate": 0.0001085769461296692, |
|
"loss": 0.332, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.037686560302972794, |
|
"learning_rate": 0.00010822039814506964, |
|
"loss": 0.3437, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.0374578982591629, |
|
"learning_rate": 0.00010786374491507494, |
|
"loss": 0.3424, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.03514384478330612, |
|
"learning_rate": 0.00010750699100590076, |
|
"loss": 0.3317, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.040791869163513184, |
|
"learning_rate": 0.00010715014098505162, |
|
"loss": 0.3389, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.03556138649582863, |
|
"learning_rate": 0.00010679319942126264, |
|
"loss": 0.3424, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.040588121861219406, |
|
"learning_rate": 0.00010643617088444094, |
|
"loss": 0.3418, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.03443857282400131, |
|
"learning_rate": 0.0001060790599456071, |
|
"loss": 0.3395, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.038418177515268326, |
|
"learning_rate": 0.00010572187117683674, |
|
"loss": 0.3325, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.03434626758098602, |
|
"learning_rate": 0.0001053646091512019, |
|
"loss": 0.3378, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.03498569130897522, |
|
"learning_rate": 0.0001050072784427126, |
|
"loss": 0.3375, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.03690873831510544, |
|
"learning_rate": 0.00010464988362625812, |
|
"loss": 0.331, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.040180571377277374, |
|
"learning_rate": 0.00010429242927754854, |
|
"loss": 0.3394, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.03546585142612457, |
|
"learning_rate": 0.00010393491997305613, |
|
"loss": 0.3368, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.03656655550003052, |
|
"learning_rate": 0.00010357736028995677, |
|
"loss": 0.332, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.03666246309876442, |
|
"learning_rate": 0.00010321975480607129, |
|
"loss": 0.3342, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.03500663861632347, |
|
"learning_rate": 0.00010286210809980697, |
|
"loss": 0.3389, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.03510987013578415, |
|
"learning_rate": 0.0001025044247500988, |
|
"loss": 0.3338, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.037753257900476456, |
|
"learning_rate": 0.00010214670933635095, |
|
"loss": 0.333, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.03440658003091812, |
|
"learning_rate": 0.00010178896643837809, |
|
"loss": 0.3456, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0362187922000885, |
|
"learning_rate": 0.00010143120063634681, |
|
"loss": 0.3484, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.034865692257881165, |
|
"learning_rate": 0.00010107341651071684, |
|
"loss": 0.3392, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.035264838486909866, |
|
"learning_rate": 0.00010071561864218262, |
|
"loss": 0.3267, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.035725150257349014, |
|
"learning_rate": 0.00010035781161161446, |
|
"loss": 0.3454, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03839932009577751, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3304, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03366202488541603, |
|
"learning_rate": 9.964218838838554e-05, |
|
"loss": 0.3288, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03588353097438812, |
|
"learning_rate": 9.92843813578174e-05, |
|
"loss": 0.3406, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.034711651504039764, |
|
"learning_rate": 9.892658348928316e-05, |
|
"loss": 0.3289, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03611205145716667, |
|
"learning_rate": 9.856879936365321e-05, |
|
"loss": 0.3294, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03515039384365082, |
|
"learning_rate": 9.821103356162189e-05, |
|
"loss": 0.3324, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03375555947422981, |
|
"learning_rate": 9.785329066364906e-05, |
|
"loss": 0.3214, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03730206564068794, |
|
"learning_rate": 9.749557524990121e-05, |
|
"loss": 0.3258, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.033822208642959595, |
|
"learning_rate": 9.713789190019304e-05, |
|
"loss": 0.3353, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.03683510422706604, |
|
"learning_rate": 9.678024519392871e-05, |
|
"loss": 0.3264, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.037592969834804535, |
|
"learning_rate": 9.642263971004324e-05, |
|
"loss": 0.329, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.03537925332784653, |
|
"learning_rate": 9.606508002694386e-05, |
|
"loss": 0.3448, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.033904388546943665, |
|
"learning_rate": 9.570757072245147e-05, |
|
"loss": 0.3472, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.036394666880369186, |
|
"learning_rate": 9.535011637374189e-05, |
|
"loss": 0.3288, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.03391895070672035, |
|
"learning_rate": 9.499272155728742e-05, |
|
"loss": 0.3368, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.037903424352407455, |
|
"learning_rate": 9.463539084879809e-05, |
|
"loss": 0.3443, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.033752232789993286, |
|
"learning_rate": 9.427812882316329e-05, |
|
"loss": 0.3316, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.03620493784546852, |
|
"learning_rate": 9.392094005439291e-05, |
|
"loss": 0.3318, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.0346236452460289, |
|
"learning_rate": 9.356382911555907e-05, |
|
"loss": 0.3438, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03393540903925896, |
|
"learning_rate": 9.320680057873735e-05, |
|
"loss": 0.3459, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03790227323770523, |
|
"learning_rate": 9.28498590149484e-05, |
|
"loss": 0.3405, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03373325988650322, |
|
"learning_rate": 9.249300899409924e-05, |
|
"loss": 0.3329, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03829934075474739, |
|
"learning_rate": 9.213625508492508e-05, |
|
"loss": 0.3321, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03592018410563469, |
|
"learning_rate": 9.177960185493036e-05, |
|
"loss": 0.3186, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03398677706718445, |
|
"learning_rate": 9.142305387033081e-05, |
|
"loss": 0.3465, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03493371605873108, |
|
"learning_rate": 9.106661569599442e-05, |
|
"loss": 0.3358, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03722028434276581, |
|
"learning_rate": 9.071029189538353e-05, |
|
"loss": 0.3278, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.033628251403570175, |
|
"learning_rate": 9.035408703049596e-05, |
|
"loss": 0.3315, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.036100320518016815, |
|
"learning_rate": 8.9998005661807e-05, |
|
"loss": 0.322, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.03419538959860802, |
|
"learning_rate": 8.96420523482106e-05, |
|
"loss": 0.3241, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.03728823736310005, |
|
"learning_rate": 8.928623164696146e-05, |
|
"loss": 0.3312, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.03538035228848457, |
|
"learning_rate": 8.893054811361624e-05, |
|
"loss": 0.3244, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.036142975091934204, |
|
"learning_rate": 8.85750063019756e-05, |
|
"loss": 0.3401, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.0373205728828907, |
|
"learning_rate": 8.821961076402563e-05, |
|
"loss": 0.3334, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.035391587764024734, |
|
"learning_rate": 8.786436604987978e-05, |
|
"loss": 0.341, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.03749712184071541, |
|
"learning_rate": 8.750927670772044e-05, |
|
"loss": 0.3179, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.037055715918540955, |
|
"learning_rate": 8.715434728374083e-05, |
|
"loss": 0.3226, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.03762289509177208, |
|
"learning_rate": 8.679958232208668e-05, |
|
"loss": 0.337, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.03537070378661156, |
|
"learning_rate": 8.644498636479819e-05, |
|
"loss": 0.3306, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.03710364177823067, |
|
"learning_rate": 8.609056395175175e-05, |
|
"loss": 0.3256, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.03595058619976044, |
|
"learning_rate": 8.573631962060192e-05, |
|
"loss": 0.3337, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.03377842903137207, |
|
"learning_rate": 8.538225790672322e-05, |
|
"loss": 0.3411, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.036737628281116486, |
|
"learning_rate": 8.50283833431522e-05, |
|
"loss": 0.3286, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.034495435655117035, |
|
"learning_rate": 8.467470046052927e-05, |
|
"loss": 0.3345, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.036502912640571594, |
|
"learning_rate": 8.432121378704081e-05, |
|
"loss": 0.3237, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.03458232432603836, |
|
"learning_rate": 8.396792784836108e-05, |
|
"loss": 0.3215, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.036796458065509796, |
|
"learning_rate": 8.361484716759445e-05, |
|
"loss": 0.3375, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.03392602503299713, |
|
"learning_rate": 8.326197626521723e-05, |
|
"loss": 0.335, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.03623703867197037, |
|
"learning_rate": 8.290931965902008e-05, |
|
"loss": 0.3388, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.03508217632770538, |
|
"learning_rate": 8.255688186404996e-05, |
|
"loss": 0.3267, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.03563014790415764, |
|
"learning_rate": 8.220466739255244e-05, |
|
"loss": 0.3277, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.035729020833969116, |
|
"learning_rate": 8.185268075391388e-05, |
|
"loss": 0.3348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.034020714461803436, |
|
"learning_rate": 8.150092645460366e-05, |
|
"loss": 0.32, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.033443402498960495, |
|
"learning_rate": 8.114940899811662e-05, |
|
"loss": 0.3402, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.03387276828289032, |
|
"learning_rate": 8.079813288491521e-05, |
|
"loss": 0.3319, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.033886682242155075, |
|
"learning_rate": 8.044710261237207e-05, |
|
"loss": 0.3307, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.03618338331580162, |
|
"learning_rate": 8.009632267471227e-05, |
|
"loss": 0.3286, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.03527137264609337, |
|
"learning_rate": 7.974579756295591e-05, |
|
"loss": 0.3171, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.036220818758010864, |
|
"learning_rate": 7.939553176486052e-05, |
|
"loss": 0.3396, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.035215239971876144, |
|
"learning_rate": 7.904552976486372e-05, |
|
"loss": 0.3226, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.0399019829928875, |
|
"learning_rate": 7.869579604402562e-05, |
|
"loss": 0.3245, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.03600882738828659, |
|
"learning_rate": 7.83463350799717e-05, |
|
"loss": 0.3217, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.04228314757347107, |
|
"learning_rate": 7.799715134683523e-05, |
|
"loss": 0.3282, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.03412134200334549, |
|
"learning_rate": 7.764824931520018e-05, |
|
"loss": 0.3297, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.03876680135726929, |
|
"learning_rate": 7.729963345204386e-05, |
|
"loss": 0.3232, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.034336164593696594, |
|
"learning_rate": 7.695130822067984e-05, |
|
"loss": 0.32, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.0393151193857193, |
|
"learning_rate": 7.660327808070064e-05, |
|
"loss": 0.3321, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.03484569489955902, |
|
"learning_rate": 7.625554748792085e-05, |
|
"loss": 0.3315, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.042867034673690796, |
|
"learning_rate": 7.59081208943199e-05, |
|
"loss": 0.3294, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.03290384262800217, |
|
"learning_rate": 7.556100274798519e-05, |
|
"loss": 0.3284, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.042282383888959885, |
|
"learning_rate": 7.521419749305497e-05, |
|
"loss": 0.3304, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.03494780510663986, |
|
"learning_rate": 7.486770956966171e-05, |
|
"loss": 0.3378, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.04240773990750313, |
|
"learning_rate": 7.452154341387493e-05, |
|
"loss": 0.3325, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.037106744945049286, |
|
"learning_rate": 7.417570345764481e-05, |
|
"loss": 0.3267, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0382254458963871, |
|
"learning_rate": 7.383019412874489e-05, |
|
"loss": 0.3225, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.037587832659482956, |
|
"learning_rate": 7.348501985071603e-05, |
|
"loss": 0.3197, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.03936656191945076, |
|
"learning_rate": 7.314018504280917e-05, |
|
"loss": 0.3268, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.037244342267513275, |
|
"learning_rate": 7.279569411992926e-05, |
|
"loss": 0.3254, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.03609207645058632, |
|
"learning_rate": 7.24515514925783e-05, |
|
"loss": 0.3388, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.03564932942390442, |
|
"learning_rate": 7.210776156679931e-05, |
|
"loss": 0.3307, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.03292916342616081, |
|
"learning_rate": 7.176432874411941e-05, |
|
"loss": 0.3257, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0374549925327301, |
|
"learning_rate": 7.1421257421494e-05, |
|
"loss": 0.3417, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.03253106027841568, |
|
"learning_rate": 7.107855199125002e-05, |
|
"loss": 0.3397, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.035386502742767334, |
|
"learning_rate": 7.073621684103007e-05, |
|
"loss": 0.3329, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.032128430902957916, |
|
"learning_rate": 7.039425635373589e-05, |
|
"loss": 0.3314, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.03548587113618851, |
|
"learning_rate": 7.005267490747263e-05, |
|
"loss": 0.3267, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.034080877900123596, |
|
"learning_rate": 6.971147687549239e-05, |
|
"loss": 0.3276, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.03397037461400032, |
|
"learning_rate": 6.937066662613863e-05, |
|
"loss": 0.3326, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.03224964439868927, |
|
"learning_rate": 6.903024852278985e-05, |
|
"loss": 0.3159, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.0338578037917614, |
|
"learning_rate": 6.869022692380411e-05, |
|
"loss": 0.3276, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.03182509168982506, |
|
"learning_rate": 6.835060618246289e-05, |
|
"loss": 0.3293, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.03487447649240494, |
|
"learning_rate": 6.801139064691562e-05, |
|
"loss": 0.3266, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.03520505875349045, |
|
"learning_rate": 6.76725846601238e-05, |
|
"loss": 0.3172, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.0320887491106987, |
|
"learning_rate": 6.733419255980559e-05, |
|
"loss": 0.3267, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.03487367928028107, |
|
"learning_rate": 6.699621867838008e-05, |
|
"loss": 0.3243, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.03125692531466484, |
|
"learning_rate": 6.665866734291205e-05, |
|
"loss": 0.3196, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.0348886102437973, |
|
"learning_rate": 6.63215428750563e-05, |
|
"loss": 0.3331, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.03260529041290283, |
|
"learning_rate": 6.598484959100257e-05, |
|
"loss": 0.3217, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03670089319348335, |
|
"learning_rate": 6.564859180142012e-05, |
|
"loss": 0.313, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03376466780900955, |
|
"learning_rate": 6.53127738114026e-05, |
|
"loss": 0.332, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.0370427742600441, |
|
"learning_rate": 6.497739992041296e-05, |
|
"loss": 0.3107, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.0331474132835865, |
|
"learning_rate": 6.46424744222283e-05, |
|
"loss": 0.3226, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03344728797674179, |
|
"learning_rate": 6.430800160488508e-05, |
|
"loss": 0.3164, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03475061431527138, |
|
"learning_rate": 6.397398575062396e-05, |
|
"loss": 0.3283, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03373516723513603, |
|
"learning_rate": 6.364043113583524e-05, |
|
"loss": 0.3245, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03636647015810013, |
|
"learning_rate": 6.330734203100394e-05, |
|
"loss": 0.3344, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03600539267063141, |
|
"learning_rate": 6.297472270065519e-05, |
|
"loss": 0.3211, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03470810502767563, |
|
"learning_rate": 6.26425774032996e-05, |
|
"loss": 0.3227, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03501768782734871, |
|
"learning_rate": 6.231091039137881e-05, |
|
"loss": 0.3402, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03468218818306923, |
|
"learning_rate": 6.19797259112109e-05, |
|
"loss": 0.3404, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03609640896320343, |
|
"learning_rate": 6.164902820293621e-05, |
|
"loss": 0.3288, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03670278191566467, |
|
"learning_rate": 6.131882150046291e-05, |
|
"loss": 0.3202, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.036678191274404526, |
|
"learning_rate": 6.0989110031412856e-05, |
|
"loss": 0.3256, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03913621976971626, |
|
"learning_rate": 6.065989801706744e-05, |
|
"loss": 0.3219, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03633202239871025, |
|
"learning_rate": 6.033118967231359e-05, |
|
"loss": 0.3227, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03726629912853241, |
|
"learning_rate": 6.0002989205589734e-05, |
|
"loss": 0.3221, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.03927043080329895, |
|
"learning_rate": 5.9675300818832016e-05, |
|
"loss": 0.333, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.032321300357580185, |
|
"learning_rate": 5.934812870742036e-05, |
|
"loss": 0.3251, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.039472486823797226, |
|
"learning_rate": 5.9021477060124954e-05, |
|
"loss": 0.3333, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.03381982445716858, |
|
"learning_rate": 5.869535005905232e-05, |
|
"loss": 0.3456, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.03533458709716797, |
|
"learning_rate": 5.836975187959223e-05, |
|
"loss": 0.3283, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.03490634635090828, |
|
"learning_rate": 5.804468669036369e-05, |
|
"loss": 0.3259, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.033531077206134796, |
|
"learning_rate": 5.772015865316205e-05, |
|
"loss": 0.3236, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.03854377195239067, |
|
"learning_rate": 5.739617192290545e-05, |
|
"loss": 0.3196, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.03259904310107231, |
|
"learning_rate": 5.707273064758178e-05, |
|
"loss": 0.322, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.034173525869846344, |
|
"learning_rate": 5.6749838968195326e-05, |
|
"loss": 0.3316, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.03369203209877014, |
|
"learning_rate": 5.642750101871408e-05, |
|
"loss": 0.316, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.034140925854444504, |
|
"learning_rate": 5.610572092601659e-05, |
|
"loss": 0.3129, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.034850213676691055, |
|
"learning_rate": 5.578450280983924e-05, |
|
"loss": 0.3429, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.036979470402002335, |
|
"learning_rate": 5.5463850782723346e-05, |
|
"loss": 0.316, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.03544802591204643, |
|
"learning_rate": 5.51437689499627e-05, |
|
"loss": 0.3234, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.03452309966087341, |
|
"learning_rate": 5.48242614095509e-05, |
|
"loss": 0.3299, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.032840970903635025, |
|
"learning_rate": 5.450533225212896e-05, |
|
"loss": 0.3274, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.034303292632102966, |
|
"learning_rate": 5.418698556093271e-05, |
|
"loss": 0.3326, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.03548911213874817, |
|
"learning_rate": 5.3869225411740884e-05, |
|
"loss": 0.3369, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.033440180122852325, |
|
"learning_rate": 5.3552055872822636e-05, |
|
"loss": 0.3335, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.0349792055785656, |
|
"learning_rate": 5.323548100488565e-05, |
|
"loss": 0.3079, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.03338850289583206, |
|
"learning_rate": 5.2919504861023903e-05, |
|
"loss": 0.3105, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.03373012691736221, |
|
"learning_rate": 5.260413148666614e-05, |
|
"loss": 0.3267, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.034098975360393524, |
|
"learning_rate": 5.228936491952363e-05, |
|
"loss": 0.3167, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.0328991673886776, |
|
"learning_rate": 5.197520918953904e-05, |
|
"loss": 0.3325, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.033464428037405014, |
|
"learning_rate": 5.16616683188342e-05, |
|
"loss": 0.3318, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.03465236350893974, |
|
"learning_rate": 5.134874632165913e-05, |
|
"loss": 0.3275, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.03109387867152691, |
|
"learning_rate": 5.103644720434027e-05, |
|
"loss": 0.3232, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.035864707082509995, |
|
"learning_rate": 5.072477496522958e-05, |
|
"loss": 0.3095, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.03278312832117081, |
|
"learning_rate": 5.041373359465289e-05, |
|
"loss": 0.3202, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.03266187012195587, |
|
"learning_rate": 5.010332707485925e-05, |
|
"loss": 0.3188, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.036194685846567154, |
|
"learning_rate": 4.9793559379969566e-05, |
|
"loss": 0.3143, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.0343778058886528, |
|
"learning_rate": 4.9484434475925976e-05, |
|
"loss": 0.3204, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.03244636580348015, |
|
"learning_rate": 4.917595632044113e-05, |
|
"loss": 0.3252, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.036536213010549545, |
|
"learning_rate": 4.8868128862947115e-05, |
|
"loss": 0.313, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0331181101500988, |
|
"learning_rate": 4.856095604454539e-05, |
|
"loss": 0.328, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.03526446595788002, |
|
"learning_rate": 4.825444179795589e-05, |
|
"loss": 0.3218, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.03498705476522446, |
|
"learning_rate": 4.7948590047467153e-05, |
|
"loss": 0.3243, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.03270057961344719, |
|
"learning_rate": 4.764340470888556e-05, |
|
"loss": 0.3184, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.033852964639663696, |
|
"learning_rate": 4.7338889689485624e-05, |
|
"loss": 0.3219, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.03443654999136925, |
|
"learning_rate": 4.703504888795958e-05, |
|
"loss": 0.3187, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.03217703476548195, |
|
"learning_rate": 4.673188619436798e-05, |
|
"loss": 0.3208, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.033705055713653564, |
|
"learning_rate": 4.6429405490089306e-05, |
|
"loss": 0.323, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.033967047929763794, |
|
"learning_rate": 4.6127610647770767e-05, |
|
"loss": 0.3258, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.031915146857500076, |
|
"learning_rate": 4.582650553127835e-05, |
|
"loss": 0.3117, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.033060990273952484, |
|
"learning_rate": 4.552609399564762e-05, |
|
"loss": 0.3203, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.032012149691581726, |
|
"learning_rate": 4.522637988703424e-05, |
|
"loss": 0.333, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.033706024289131165, |
|
"learning_rate": 4.492736704266475e-05, |
|
"loss": 0.3233, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.032826974987983704, |
|
"learning_rate": 4.462905929078734e-05, |
|
"loss": 0.3208, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.03313816711306572, |
|
"learning_rate": 4.4331460450623064e-05, |
|
"loss": 0.3067, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.032175060361623764, |
|
"learning_rate": 4.403457433231674e-05, |
|
"loss": 0.3259, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.03576289862394333, |
|
"learning_rate": 4.373840473688829e-05, |
|
"loss": 0.321, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.03244362026453018, |
|
"learning_rate": 4.344295545618392e-05, |
|
"loss": 0.3074, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.03456572815775871, |
|
"learning_rate": 4.3148230272827784e-05, |
|
"loss": 0.3212, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.037079472094774246, |
|
"learning_rate": 4.285423296017341e-05, |
|
"loss": 0.3389, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.03216618299484253, |
|
"learning_rate": 4.256096728225548e-05, |
|
"loss": 0.314, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.035881418734788895, |
|
"learning_rate": 4.2268436993741454e-05, |
|
"loss": 0.3327, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.03452802449464798, |
|
"learning_rate": 4.197664583988376e-05, |
|
"loss": 0.3225, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.03389676287770271, |
|
"learning_rate": 4.168559755647169e-05, |
|
"loss": 0.3234, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.033218082040548325, |
|
"learning_rate": 4.1395295869783615e-05, |
|
"loss": 0.319, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.03429538384079933, |
|
"learning_rate": 4.110574449653916e-05, |
|
"loss": 0.3201, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.03393711894750595, |
|
"learning_rate": 4.0816947143851816e-05, |
|
"loss": 0.3249, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03543519601225853, |
|
"learning_rate": 4.052890750918138e-05, |
|
"loss": 0.3082, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03144745156168938, |
|
"learning_rate": 4.024162928028663e-05, |
|
"loss": 0.3232, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03410906717181206, |
|
"learning_rate": 3.995511613517799e-05, |
|
"loss": 0.3176, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03316747397184372, |
|
"learning_rate": 3.966937174207066e-05, |
|
"loss": 0.3325, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.032455362379550934, |
|
"learning_rate": 3.938439975933752e-05, |
|
"loss": 0.3175, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03558775410056114, |
|
"learning_rate": 3.910020383546233e-05, |
|
"loss": 0.322, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.032109521329402924, |
|
"learning_rate": 3.881678760899291e-05, |
|
"loss": 0.3261, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03333992138504982, |
|
"learning_rate": 3.853415470849479e-05, |
|
"loss": 0.3173, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03266488015651703, |
|
"learning_rate": 3.8252308752504404e-05, |
|
"loss": 0.3252, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.03426700085401535, |
|
"learning_rate": 3.7971253349483285e-05, |
|
"loss": 0.319, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.03221927583217621, |
|
"learning_rate": 3.7690992097771224e-05, |
|
"loss": 0.3276, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.031010834500193596, |
|
"learning_rate": 3.741152858554077e-05, |
|
"loss": 0.3198, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.03406721353530884, |
|
"learning_rate": 3.713286639075083e-05, |
|
"loss": 0.3323, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.03283034637570381, |
|
"learning_rate": 3.6855009081101355e-05, |
|
"loss": 0.3177, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.03155268728733063, |
|
"learning_rate": 3.6577960213987104e-05, |
|
"loss": 0.3235, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.034217044711112976, |
|
"learning_rate": 3.630172333645261e-05, |
|
"loss": 0.3229, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.03232642635703087, |
|
"learning_rate": 3.602630198514632e-05, |
|
"loss": 0.3214, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.03322529047727585, |
|
"learning_rate": 3.5751699686275786e-05, |
|
"loss": 0.3269, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03211558982729912, |
|
"learning_rate": 3.547791995556203e-05, |
|
"loss": 0.3173, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03321954980492592, |
|
"learning_rate": 3.520496629819494e-05, |
|
"loss": 0.309, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03272011876106262, |
|
"learning_rate": 3.493284220878817e-05, |
|
"loss": 0.3191, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03565934672951698, |
|
"learning_rate": 3.466155117133433e-05, |
|
"loss": 0.3137, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03242676332592964, |
|
"learning_rate": 3.439109665916076e-05, |
|
"loss": 0.3269, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03284203261137009, |
|
"learning_rate": 3.4121482134884575e-05, |
|
"loss": 0.3254, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.035054489970207214, |
|
"learning_rate": 3.385271105036868e-05, |
|
"loss": 0.3263, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03570910543203354, |
|
"learning_rate": 3.358478684667734e-05, |
|
"loss": 0.3322, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.030848857015371323, |
|
"learning_rate": 3.3317712954032454e-05, |
|
"loss": 0.321, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.03351164981722832, |
|
"learning_rate": 3.305149279176921e-05, |
|
"loss": 0.3192, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.03559879586100578, |
|
"learning_rate": 3.278612976829265e-05, |
|
"loss": 0.3202, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.030793210491538048, |
|
"learning_rate": 3.252162728103382e-05, |
|
"loss": 0.3151, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.032545629888772964, |
|
"learning_rate": 3.225798871640644e-05, |
|
"loss": 0.3214, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.03530002385377884, |
|
"learning_rate": 3.199521744976342e-05, |
|
"loss": 0.3224, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.0325043648481369, |
|
"learning_rate": 3.1733316845353735e-05, |
|
"loss": 0.3265, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.033760733902454376, |
|
"learning_rate": 3.147229025627922e-05, |
|
"loss": 0.3304, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.0321899875998497, |
|
"learning_rate": 3.1212141024451826e-05, |
|
"loss": 0.3176, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.03273585066199303, |
|
"learning_rate": 3.095287248055069e-05, |
|
"loss": 0.3286, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.03182963281869888, |
|
"learning_rate": 3.069448794397959e-05, |
|
"loss": 0.3201, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.03337734937667847, |
|
"learning_rate": 3.043699072282429e-05, |
|
"loss": 0.3269, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.03036559373140335, |
|
"learning_rate": 3.01803841138104e-05, |
|
"loss": 0.3203, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.03221311792731285, |
|
"learning_rate": 2.9924671402261018e-05, |
|
"loss": 0.3354, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.031059028580784798, |
|
"learning_rate": 2.966985586205475e-05, |
|
"loss": 0.3171, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.03237179294228554, |
|
"learning_rate": 2.941594075558366e-05, |
|
"loss": 0.3173, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.03317666053771973, |
|
"learning_rate": 2.9162929333711697e-05, |
|
"loss": 0.3063, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.034094829112291336, |
|
"learning_rate": 2.8910824835732952e-05, |
|
"loss": 0.3012, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.031877171248197556, |
|
"learning_rate": 2.8659630489330213e-05, |
|
"loss": 0.3268, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.03302241861820221, |
|
"learning_rate": 2.8409349510533578e-05, |
|
"loss": 0.3093, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.03291558101773262, |
|
"learning_rate": 2.8159985103679408e-05, |
|
"loss": 0.3179, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.031883399933576584, |
|
"learning_rate": 2.7911540461369222e-05, |
|
"loss": 0.3204, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.03172933682799339, |
|
"learning_rate": 2.7664018764428835e-05, |
|
"loss": 0.3192, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.03233201429247856, |
|
"learning_rate": 2.7417423181867585e-05, |
|
"loss": 0.3118, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.032947055995464325, |
|
"learning_rate": 2.717175687083785e-05, |
|
"loss": 0.318, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.03231576085090637, |
|
"learning_rate": 2.6927022976594607e-05, |
|
"loss": 0.3064, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.032052453607320786, |
|
"learning_rate": 2.668322463245513e-05, |
|
"loss": 0.3202, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.03498750552535057, |
|
"learning_rate": 2.6440364959758813e-05, |
|
"loss": 0.3167, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.033017512410879135, |
|
"learning_rate": 2.619844706782737e-05, |
|
"loss": 0.3166, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.033758796751499176, |
|
"learning_rate": 2.595747405392491e-05, |
|
"loss": 0.3112, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.03454572334885597, |
|
"learning_rate": 2.5717449003218342e-05, |
|
"loss": 0.317, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.03280545398592949, |
|
"learning_rate": 2.5478374988737753e-05, |
|
"loss": 0.3161, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.03240388259291649, |
|
"learning_rate": 2.524025507133726e-05, |
|
"loss": 0.3259, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.03514571860432625, |
|
"learning_rate": 2.5003092299655584e-05, |
|
"loss": 0.3247, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.03480707108974457, |
|
"learning_rate": 2.4766889710077357e-05, |
|
"loss": 0.323, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.03250759467482567, |
|
"learning_rate": 2.4531650326693822e-05, |
|
"loss": 0.313, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03347860276699066, |
|
"learning_rate": 2.429737716126451e-05, |
|
"loss": 0.3262, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.035569798201322556, |
|
"learning_rate": 2.406407321317835e-05, |
|
"loss": 0.3147, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03313907980918884, |
|
"learning_rate": 2.3831741469415593e-05, |
|
"loss": 0.3252, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03403162583708763, |
|
"learning_rate": 2.3600384904509254e-05, |
|
"loss": 0.2971, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03517422825098038, |
|
"learning_rate": 2.3370006480507244e-05, |
|
"loss": 0.3191, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03399303928017616, |
|
"learning_rate": 2.31406091469344e-05, |
|
"loss": 0.3182, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03280871361494064, |
|
"learning_rate": 2.2912195840754604e-05, |
|
"loss": 0.3165, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03217533603310585, |
|
"learning_rate": 2.2684769486333445e-05, |
|
"loss": 0.3258, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.03353995084762573, |
|
"learning_rate": 2.2458332995400433e-05, |
|
"loss": 0.3231, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03184211999177933, |
|
"learning_rate": 2.2232889267012038e-05, |
|
"loss": 0.3027, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03190856799483299, |
|
"learning_rate": 2.2008441187514293e-05, |
|
"loss": 0.3238, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03220997750759125, |
|
"learning_rate": 2.178499163050617e-05, |
|
"loss": 0.3197, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03379112109541893, |
|
"learning_rate": 2.156254345680242e-05, |
|
"loss": 0.3115, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.031570915132761, |
|
"learning_rate": 2.1341099514397266e-05, |
|
"loss": 0.3131, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.032580941915512085, |
|
"learning_rate": 2.1120662638427647e-05, |
|
"loss": 0.3098, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03251396119594574, |
|
"learning_rate": 2.0901235651137284e-05, |
|
"loss": 0.3124, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03203850984573364, |
|
"learning_rate": 2.0682821361840154e-05, |
|
"loss": 0.3219, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03141346946358681, |
|
"learning_rate": 2.0465422566884805e-05, |
|
"loss": 0.3144, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.031967923045158386, |
|
"learning_rate": 2.0249042049618384e-05, |
|
"loss": 0.3129, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.031204957515001297, |
|
"learning_rate": 2.0033682580351144e-05, |
|
"loss": 0.315, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03257647901773453, |
|
"learning_rate": 1.981934691632088e-05, |
|
"loss": 0.3158, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03063308075070381, |
|
"learning_rate": 1.9606037801657673e-05, |
|
"loss": 0.3272, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.0314013808965683, |
|
"learning_rate": 1.9393757967348658e-05, |
|
"loss": 0.324, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03184258192777634, |
|
"learning_rate": 1.9182510131203224e-05, |
|
"loss": 0.322, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03182119131088257, |
|
"learning_rate": 1.8972296997818107e-05, |
|
"loss": 0.3121, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03157862275838852, |
|
"learning_rate": 1.8763121258542815e-05, |
|
"loss": 0.3191, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03184880316257477, |
|
"learning_rate": 1.8554985591445072e-05, |
|
"loss": 0.3125, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.03168749809265137, |
|
"learning_rate": 1.8347892661276656e-05, |
|
"loss": 0.3211, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.031068336218595505, |
|
"learning_rate": 1.8141845119439248e-05, |
|
"loss": 0.3185, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.03259959816932678, |
|
"learning_rate": 1.7936845603950447e-05, |
|
"loss": 0.3132, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.032149478793144226, |
|
"learning_rate": 1.7732896739409964e-05, |
|
"loss": 0.3241, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.031526118516922, |
|
"learning_rate": 1.753000113696617e-05, |
|
"loss": 0.33, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.031119871884584427, |
|
"learning_rate": 1.7328161394282495e-05, |
|
"loss": 0.3254, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.03228578716516495, |
|
"learning_rate": 1.7127380095504296e-05, |
|
"loss": 0.3096, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.03137804940342903, |
|
"learning_rate": 1.6927659811225648e-05, |
|
"loss": 0.3236, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.031106434762477875, |
|
"learning_rate": 1.6729003098456576e-05, |
|
"loss": 0.3085, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.02995161898434162, |
|
"learning_rate": 1.6531412500590206e-05, |
|
"loss": 0.3155, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.031855881214141846, |
|
"learning_rate": 1.6334890547370286e-05, |
|
"loss": 0.3086, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.03299317881464958, |
|
"learning_rate": 1.6139439754858653e-05, |
|
"loss": 0.3116, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.03227045387029648, |
|
"learning_rate": 1.594506262540324e-05, |
|
"loss": 0.3234, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.030943624675273895, |
|
"learning_rate": 1.5751761647605844e-05, |
|
"loss": 0.3119, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.031604256480932236, |
|
"learning_rate": 1.5559539296290403e-05, |
|
"loss": 0.3125, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.030888285487890244, |
|
"learning_rate": 1.5368398032471155e-05, |
|
"loss": 0.3267, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.03147697448730469, |
|
"learning_rate": 1.5178340303321314e-05, |
|
"loss": 0.3224, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.03143538162112236, |
|
"learning_rate": 1.4989368542141546e-05, |
|
"loss": 0.3124, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.03131905943155289, |
|
"learning_rate": 1.4801485168329066e-05, |
|
"loss": 0.3163, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.031624022871255875, |
|
"learning_rate": 1.4614692587346368e-05, |
|
"loss": 0.3201, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.03109106421470642, |
|
"learning_rate": 1.4428993190690677e-05, |
|
"loss": 0.3141, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.03195478022098541, |
|
"learning_rate": 1.4244389355863097e-05, |
|
"loss": 0.3169, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.031268466264009476, |
|
"learning_rate": 1.4060883446338502e-05, |
|
"loss": 0.3136, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.031183408573269844, |
|
"learning_rate": 1.3878477811534884e-05, |
|
"loss": 0.3212, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.030434761196374893, |
|
"learning_rate": 1.3697174786783584e-05, |
|
"loss": 0.3247, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.03006039746105671, |
|
"learning_rate": 1.3516976693299189e-05, |
|
"loss": 0.3263, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.03140862286090851, |
|
"learning_rate": 1.3337885838149988e-05, |
|
"loss": 0.3116, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.031039943918585777, |
|
"learning_rate": 1.3159904514228317e-05, |
|
"loss": 0.3187, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.030673634260892868, |
|
"learning_rate": 1.2983035000221177e-05, |
|
"loss": 0.3092, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.030753571540117264, |
|
"learning_rate": 1.2807279560581208e-05, |
|
"loss": 0.3031, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.030619826167821884, |
|
"learning_rate": 1.263264044549748e-05, |
|
"loss": 0.3205, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.0303605068475008, |
|
"learning_rate": 1.2459119890866988e-05, |
|
"loss": 0.3211, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.030971838161349297, |
|
"learning_rate": 1.2286720118265659e-05, |
|
"loss": 0.3172, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.031022140756249428, |
|
"learning_rate": 1.211544333492023e-05, |
|
"loss": 0.3144, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.030957097187638283, |
|
"learning_rate": 1.1945291733679764e-05, |
|
"loss": 0.3144, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.029829049482941628, |
|
"learning_rate": 1.1776267492987769e-05, |
|
"loss": 0.319, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.02998540922999382, |
|
"learning_rate": 1.1608372776854103e-05, |
|
"loss": 0.3226, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.031895529478788376, |
|
"learning_rate": 1.1441609734827452e-05, |
|
"loss": 0.3216, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.03028094209730625, |
|
"learning_rate": 1.1275980501967642e-05, |
|
"loss": 0.3139, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.03133474662899971, |
|
"learning_rate": 1.1111487198818482e-05, |
|
"loss": 0.3082, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.03046741895377636, |
|
"learning_rate": 1.0948131931380457e-05, |
|
"loss": 0.3191, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.030290376394987106, |
|
"learning_rate": 1.0785916791083893e-05, |
|
"loss": 0.3223, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.03159311041235924, |
|
"learning_rate": 1.0624843854762034e-05, |
|
"loss": 0.3154, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.03061581589281559, |
|
"learning_rate": 1.0464915184624602e-05, |
|
"loss": 0.3212, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.030794519931077957, |
|
"learning_rate": 1.0306132828231318e-05, |
|
"loss": 0.3189, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.031181281432509422, |
|
"learning_rate": 1.0148498818465701e-05, |
|
"loss": 0.3255, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.03161652758717537, |
|
"learning_rate": 9.992015173508995e-06, |
|
"loss": 0.3246, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.03009726293385029, |
|
"learning_rate": 9.836683896814424e-06, |
|
"loss": 0.3121, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.029831791296601295, |
|
"learning_rate": 9.682506977081496e-06, |
|
"loss": 0.3159, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.030981022864580154, |
|
"learning_rate": 9.529486388230513e-06, |
|
"loss": 0.313, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.030378982424736023, |
|
"learning_rate": 9.37762408937729e-06, |
|
"loss": 0.304, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.03098244220018387, |
|
"learning_rate": 9.226922024808194e-06, |
|
"loss": 0.3254, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.030623434111475945, |
|
"learning_rate": 9.07738212395508e-06, |
|
"loss": 0.3189, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.03084915690124035, |
|
"learning_rate": 8.929006301370747e-06, |
|
"loss": 0.3198, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.030014045536518097, |
|
"learning_rate": 8.781796456704262e-06, |
|
"loss": 0.3148, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.030298901721835136, |
|
"learning_rate": 8.635754474676805e-06, |
|
"loss": 0.3392, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.0325307697057724, |
|
"learning_rate": 8.490882225057428e-06, |
|
"loss": 0.3108, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.032279226928949356, |
|
"learning_rate": 8.347181562639184e-06, |
|
"loss": 0.3255, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.031304631382226944, |
|
"learning_rate": 8.204654327215267e-06, |
|
"loss": 0.3235, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.02953815460205078, |
|
"learning_rate": 8.063302343555623e-06, |
|
"loss": 0.338, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.03114440105855465, |
|
"learning_rate": 7.923127421383458e-06, |
|
"loss": 0.312, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.030303098261356354, |
|
"learning_rate": 7.784131355352143e-06, |
|
"loss": 0.3307, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.030576419085264206, |
|
"learning_rate": 7.646315925022152e-06, |
|
"loss": 0.3181, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.030262455344200134, |
|
"learning_rate": 7.5096828948383765e-06, |
|
"loss": 0.3099, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.0308231133967638, |
|
"learning_rate": 7.374234014107484e-06, |
|
"loss": 0.3226, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.029936641454696655, |
|
"learning_rate": 7.239971016975544e-06, |
|
"loss": 0.3147, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.03037368506193161, |
|
"learning_rate": 7.106895622405752e-06, |
|
"loss": 0.3129, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.030741466209292412, |
|
"learning_rate": 6.97500953415654e-06, |
|
"loss": 0.3086, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.030671723186969757, |
|
"learning_rate": 6.844314440759647e-06, |
|
"loss": 0.313, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.030171960592269897, |
|
"learning_rate": 6.7148120154986616e-06, |
|
"loss": 0.3124, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.030345430597662926, |
|
"learning_rate": 6.586503916387366e-06, |
|
"loss": 0.3117, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.030036896467208862, |
|
"learning_rate": 6.459391786148738e-06, |
|
"loss": 0.3229, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.03146936371922493, |
|
"learning_rate": 6.333477252193731e-06, |
|
"loss": 0.3112, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0303319301456213, |
|
"learning_rate": 6.208761926600626e-06, |
|
"loss": 0.3145, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0306432843208313, |
|
"learning_rate": 6.085247406094197e-06, |
|
"loss": 0.3374, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.030408993363380432, |
|
"learning_rate": 5.962935272025416e-06, |
|
"loss": 0.3128, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.030169207602739334, |
|
"learning_rate": 5.841827090351171e-06, |
|
"loss": 0.3193, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.031130915507674217, |
|
"learning_rate": 5.721924411614122e-06, |
|
"loss": 0.3191, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.03005225583910942, |
|
"learning_rate": 5.603228770923041e-06, |
|
"loss": 0.3177, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.02937263622879982, |
|
"learning_rate": 5.485741687932932e-06, |
|
"loss": 0.3143, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.02967953123152256, |
|
"learning_rate": 5.3694646668257855e-06, |
|
"loss": 0.3166, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.030509594827890396, |
|
"learning_rate": 5.254399196291115e-06, |
|
"loss": 0.3125, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.030052825808525085, |
|
"learning_rate": 5.140546749507136e-06, |
|
"loss": 0.3157, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.030334623530507088, |
|
"learning_rate": 5.027908784121649e-06, |
|
"loss": 0.3224, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.029934940859675407, |
|
"learning_rate": 4.916486742233606e-06, |
|
"loss": 0.3192, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.03001435659825802, |
|
"learning_rate": 4.806282050374455e-06, |
|
"loss": 0.321, |
|
"step": 801 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 888, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 89, |
|
"total_flos": 1.5977855423663309e+19, |
|
"train_batch_size": 14, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|