|
{ |
|
"best_metric": 11.915698051452637, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.0828521865209849, |
|
"eval_steps": 25, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004142609326049245, |
|
"grad_norm": 0.009111136198043823, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 11.9291, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0004142609326049245, |
|
"eval_loss": 11.93281364440918, |
|
"eval_runtime": 0.2456, |
|
"eval_samples_per_second": 203.617, |
|
"eval_steps_per_second": 28.506, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000828521865209849, |
|
"grad_norm": 0.01578676700592041, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 11.9311, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0012427827978147737, |
|
"grad_norm": 0.010788201354444027, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 11.9303, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.001657043730419698, |
|
"grad_norm": 0.014519501477479935, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 11.9288, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0020713046630246226, |
|
"grad_norm": 0.01510781329125166, |
|
"learning_rate": 0.00015, |
|
"loss": 11.9304, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0024855655956295473, |
|
"grad_norm": 0.013269297778606415, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 11.9294, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0028998265282344717, |
|
"grad_norm": 0.013751151971518993, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 11.9321, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.003314087460839396, |
|
"grad_norm": 0.015573951415717602, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 11.9287, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0037283483934443208, |
|
"grad_norm": 0.009504775516688824, |
|
"learning_rate": 0.00027, |
|
"loss": 11.9305, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004142609326049245, |
|
"grad_norm": 0.010801173746585846, |
|
"learning_rate": 0.0003, |
|
"loss": 11.9337, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0045568702586541695, |
|
"grad_norm": 0.011178042739629745, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 11.9278, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004971131191259095, |
|
"grad_norm": 0.014659388922154903, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 11.9297, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005385392123864019, |
|
"grad_norm": 0.013904299587011337, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 11.9315, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005799653056468943, |
|
"grad_norm": 0.014901366084814072, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 11.9306, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.006213913989073868, |
|
"grad_norm": 0.014373673126101494, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 11.9291, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006628174921678792, |
|
"grad_norm": 0.012232257053256035, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 11.9292, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.007042435854283717, |
|
"grad_norm": 0.011413573287427425, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 11.9322, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0074566967868886416, |
|
"grad_norm": 0.016656706109642982, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 11.9324, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007870957719493567, |
|
"grad_norm": 0.014373266138136387, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 11.9318, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00828521865209849, |
|
"grad_norm": 0.015115369111299515, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 11.9287, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008699479584703415, |
|
"grad_norm": 0.01734703592956066, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 11.9309, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.009113740517308339, |
|
"grad_norm": 0.014644989743828773, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 11.9254, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009528001449913264, |
|
"grad_norm": 0.021497027948498726, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 11.9302, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00994226238251819, |
|
"grad_norm": 0.01817285642027855, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 11.9312, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.010356523315123113, |
|
"grad_norm": 0.02207845263183117, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 11.9284, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010356523315123113, |
|
"eval_loss": 11.93165397644043, |
|
"eval_runtime": 0.2441, |
|
"eval_samples_per_second": 204.847, |
|
"eval_steps_per_second": 28.679, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010770784247728038, |
|
"grad_norm": 0.017103340476751328, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 11.9297, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.011185045180332961, |
|
"grad_norm": 0.0265261959284544, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 11.9303, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011599306112937887, |
|
"grad_norm": 0.031121160835027695, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 11.9264, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.012013567045542812, |
|
"grad_norm": 0.020733291283249855, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 11.9311, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.012427827978147735, |
|
"grad_norm": 0.019345130771398544, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 11.9295, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01284208891075266, |
|
"grad_norm": 0.025070849806070328, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 11.9307, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.013256349843357584, |
|
"grad_norm": 0.02109096758067608, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 11.927, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01367061077596251, |
|
"grad_norm": 0.027676530182361603, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 11.9296, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.014084871708567434, |
|
"grad_norm": 0.02774973399937153, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 11.9294, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.014499132641172358, |
|
"grad_norm": 0.026240255683660507, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 11.9283, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.014913393573777283, |
|
"grad_norm": 0.03300342708826065, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 11.9277, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.015327654506382208, |
|
"grad_norm": 0.03084516152739525, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 11.9284, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.015741915438987134, |
|
"grad_norm": 0.03772755712270737, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 11.9301, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.016156176371592057, |
|
"grad_norm": 0.044647328555583954, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 11.929, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01657043730419698, |
|
"grad_norm": 0.0458788201212883, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 11.9261, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016984698236801904, |
|
"grad_norm": 0.062342319637537, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 11.9292, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.01739895916940683, |
|
"grad_norm": 0.04045253247022629, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 11.9276, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.017813220102011754, |
|
"grad_norm": 0.05095702409744263, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 11.9244, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.018227481034616678, |
|
"grad_norm": 0.04911315068602562, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 11.9264, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.018641741967221605, |
|
"grad_norm": 0.04607488214969635, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 11.9244, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.019056002899826528, |
|
"grad_norm": 0.05658293142914772, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 11.9224, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01947026383243145, |
|
"grad_norm": 0.050977252423763275, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 11.9255, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01988452476503638, |
|
"grad_norm": 0.06175487861037254, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 11.9218, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.020298785697641302, |
|
"grad_norm": 0.049341753125190735, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 11.927, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.020713046630246226, |
|
"grad_norm": 0.0748741626739502, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 11.9258, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.020713046630246226, |
|
"eval_loss": 11.926169395446777, |
|
"eval_runtime": 0.2445, |
|
"eval_samples_per_second": 204.526, |
|
"eval_steps_per_second": 28.634, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021127307562851153, |
|
"grad_norm": 0.06076189503073692, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 11.9242, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.021541568495456076, |
|
"grad_norm": 0.0622175894677639, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 11.9228, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.021955829428061, |
|
"grad_norm": 0.05812002345919609, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 11.9241, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.022370090360665923, |
|
"grad_norm": 0.05627156049013138, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 11.9206, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02278435129327085, |
|
"grad_norm": 0.04959167540073395, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 11.9216, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.023198612225875773, |
|
"grad_norm": 0.05402550846338272, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 11.9216, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.023612873158480697, |
|
"grad_norm": 0.049663037061691284, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 11.9207, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.024027134091085624, |
|
"grad_norm": 0.045585885643959045, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 11.9232, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.024441395023690547, |
|
"grad_norm": 0.05635461211204529, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 11.922, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02485565595629547, |
|
"grad_norm": 0.05952185392379761, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 11.9209, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.025269916888900398, |
|
"grad_norm": 0.04048493504524231, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 11.9213, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02568417782150532, |
|
"grad_norm": 0.050521329045295715, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 11.9192, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.026098438754110245, |
|
"grad_norm": 0.043353911489248276, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 11.9202, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.026512699686715168, |
|
"grad_norm": 0.0434187576174736, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 11.9171, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.026926960619320095, |
|
"grad_norm": 0.0419292189180851, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 11.9196, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02734122155192502, |
|
"grad_norm": 0.04601728916168213, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 11.9174, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.027755482484529942, |
|
"grad_norm": 0.047061558812856674, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 11.9189, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02816974341713487, |
|
"grad_norm": 0.039436087012290955, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 11.9178, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.028584004349739792, |
|
"grad_norm": 0.03027687780559063, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 11.9175, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.028998265282344716, |
|
"grad_norm": 0.03565209358930588, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 11.9168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.029412526214949643, |
|
"grad_norm": 0.02530493400990963, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 11.9163, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.029826787147554566, |
|
"grad_norm": 0.0354970321059227, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 11.9154, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03024104808015949, |
|
"grad_norm": 0.018435046076774597, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 11.9158, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.030655309012764417, |
|
"grad_norm": 0.02600877545773983, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 11.9121, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03106956994536934, |
|
"grad_norm": 0.032639019191265106, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 11.9143, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03106956994536934, |
|
"eval_loss": 11.916912078857422, |
|
"eval_runtime": 0.2447, |
|
"eval_samples_per_second": 204.352, |
|
"eval_steps_per_second": 28.609, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03148383087797427, |
|
"grad_norm": 0.026764797046780586, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 11.9196, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03189809181057919, |
|
"grad_norm": 0.0315483957529068, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 11.9159, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.032312352743184114, |
|
"grad_norm": 0.029291000217199326, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 11.9156, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03272661367578904, |
|
"grad_norm": 0.02186393365263939, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 11.9126, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03314087460839396, |
|
"grad_norm": 0.02193310484290123, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 11.9144, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.033555135540998884, |
|
"grad_norm": 0.027057521045207977, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 11.9136, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03396939647360381, |
|
"grad_norm": 0.028718745335936546, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 11.9155, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03438365740620874, |
|
"grad_norm": 0.03541611507534981, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 11.9161, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03479791833881366, |
|
"grad_norm": 0.027952907606959343, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 11.9165, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.035212179271418585, |
|
"grad_norm": 0.02944616600871086, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 11.9162, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03562644020402351, |
|
"grad_norm": 0.028091363608837128, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 11.9145, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03604070113662843, |
|
"grad_norm": 0.01949743553996086, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 11.9126, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.036454962069233356, |
|
"grad_norm": 0.03287213295698166, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 11.9162, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.036869223001838286, |
|
"grad_norm": 0.024355346336960793, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 11.9142, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.03728348393444321, |
|
"grad_norm": 0.02432161383330822, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 11.9123, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03769774486704813, |
|
"grad_norm": 0.02186986431479454, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 11.9155, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.038112005799653056, |
|
"grad_norm": 0.02595347724854946, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 11.9145, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.03852626673225798, |
|
"grad_norm": 0.01945437304675579, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 11.9142, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0389405276648629, |
|
"grad_norm": 0.034285496920347214, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 11.9142, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03935478859746783, |
|
"grad_norm": 0.019000215455889702, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 11.9144, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03976904953007276, |
|
"grad_norm": 0.027748456224799156, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 11.9086, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04018331046267768, |
|
"grad_norm": 0.02137632481753826, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 11.9144, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.040597571395282604, |
|
"grad_norm": 0.02740846946835518, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 11.9143, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04101183232788753, |
|
"grad_norm": 0.02057352475821972, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 11.912, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04142609326049245, |
|
"grad_norm": 0.036128297448158264, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 11.9097, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04142609326049245, |
|
"eval_loss": 11.915943145751953, |
|
"eval_runtime": 0.2452, |
|
"eval_samples_per_second": 203.942, |
|
"eval_steps_per_second": 28.552, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.041840354193097375, |
|
"grad_norm": 0.014668027870357037, |
|
"learning_rate": 0.0001599135876488549, |
|
"loss": 11.9176, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.042254615125702305, |
|
"grad_norm": 0.013536687009036541, |
|
"learning_rate": 0.00015743756320098332, |
|
"loss": 11.9168, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.04266887605830723, |
|
"grad_norm": 0.02093014121055603, |
|
"learning_rate": 0.0001549595053975962, |
|
"loss": 11.9183, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.04308313699091215, |
|
"grad_norm": 0.0162394717335701, |
|
"learning_rate": 0.00015248009171495378, |
|
"loss": 11.9166, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.043497397923517075, |
|
"grad_norm": 0.014748331159353256, |
|
"learning_rate": 0.00015, |
|
"loss": 11.9153, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.043911658856122, |
|
"grad_norm": 0.014012960717082024, |
|
"learning_rate": 0.00014751990828504622, |
|
"loss": 11.9164, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04432591978872692, |
|
"grad_norm": 0.013328991830348969, |
|
"learning_rate": 0.00014504049460240375, |
|
"loss": 11.9175, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.044740180721331846, |
|
"grad_norm": 0.016932377591729164, |
|
"learning_rate": 0.00014256243679901663, |
|
"loss": 11.9142, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.045154441653936776, |
|
"grad_norm": 0.011404227465391159, |
|
"learning_rate": 0.00014008641235114508, |
|
"loss": 11.9137, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0455687025865417, |
|
"grad_norm": 0.018134044483304024, |
|
"learning_rate": 0.00013761309817915014, |
|
"loss": 11.9148, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04598296351914662, |
|
"grad_norm": 0.015835769474506378, |
|
"learning_rate": 0.00013514317046243058, |
|
"loss": 11.9172, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.04639722445175155, |
|
"grad_norm": 0.016173770651221275, |
|
"learning_rate": 0.00013267730445456208, |
|
"loss": 11.9142, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.04681148538435647, |
|
"grad_norm": 0.015759384259581566, |
|
"learning_rate": 0.00013021617429868963, |
|
"loss": 11.9165, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.047225746316961394, |
|
"grad_norm": 0.013094507157802582, |
|
"learning_rate": 0.00012776045284322368, |
|
"loss": 11.9172, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.047640007249566324, |
|
"grad_norm": 0.018885008990764618, |
|
"learning_rate": 0.00012531081145788987, |
|
"loss": 11.9162, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.04805426818217125, |
|
"grad_norm": 0.011450850404798985, |
|
"learning_rate": 0.00012286791985018355, |
|
"loss": 11.9158, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.04846852911477617, |
|
"grad_norm": 0.01042882353067398, |
|
"learning_rate": 0.00012043244588227796, |
|
"loss": 11.9134, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.048882790047381094, |
|
"grad_norm": 0.01977408304810524, |
|
"learning_rate": 0.00011800505538843798, |
|
"loss": 11.9159, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.04929705097998602, |
|
"grad_norm": 0.016834771260619164, |
|
"learning_rate": 0.00011558641199298727, |
|
"loss": 11.9161, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.04971131191259094, |
|
"grad_norm": 0.015108548104763031, |
|
"learning_rate": 0.00011317717692888012, |
|
"loss": 11.9136, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.050125572845195865, |
|
"grad_norm": 0.011461148038506508, |
|
"learning_rate": 0.00011077800885692702, |
|
"loss": 11.9161, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.050539833777800795, |
|
"grad_norm": 0.01484390813857317, |
|
"learning_rate": 0.00010838956368572334, |
|
"loss": 11.9174, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.05095409471040572, |
|
"grad_norm": 0.013123923912644386, |
|
"learning_rate": 0.0001060124943923303, |
|
"loss": 11.9137, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.05136835564301064, |
|
"grad_norm": 0.016649112105369568, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 11.9142, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.051782616575615566, |
|
"grad_norm": 0.016424622386693954, |
|
"learning_rate": 0.00010129507961929748, |
|
"loss": 11.9117, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.051782616575615566, |
|
"eval_loss": 11.915715217590332, |
|
"eval_runtime": 0.2449, |
|
"eval_samples_per_second": 204.149, |
|
"eval_steps_per_second": 28.581, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05219687750822049, |
|
"grad_norm": 0.011177320964634418, |
|
"learning_rate": 9.895602383375353e-05, |
|
"loss": 11.9142, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.05261113844082541, |
|
"grad_norm": 0.014857947826385498, |
|
"learning_rate": 9.663092296162251e-05, |
|
"loss": 11.9146, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.053025399373430336, |
|
"grad_norm": 0.032292690128088, |
|
"learning_rate": 9.432041266226686e-05, |
|
"loss": 11.9153, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.053439660306035267, |
|
"grad_norm": 0.018420234322547913, |
|
"learning_rate": 9.202512460613219e-05, |
|
"loss": 11.9139, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.05385392123864019, |
|
"grad_norm": 0.01115041971206665, |
|
"learning_rate": 8.97456863020546e-05, |
|
"loss": 11.9147, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05426818217124511, |
|
"grad_norm": 0.01565595157444477, |
|
"learning_rate": 8.748272092570646e-05, |
|
"loss": 11.9156, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05468244310385004, |
|
"grad_norm": 0.015339067205786705, |
|
"learning_rate": 8.523684714922608e-05, |
|
"loss": 11.9149, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05509670403645496, |
|
"grad_norm": 0.013736837543547153, |
|
"learning_rate": 8.300867897207903e-05, |
|
"loss": 11.9145, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.055510964969059884, |
|
"grad_norm": 0.016272036358714104, |
|
"learning_rate": 8.079882555319684e-05, |
|
"loss": 11.9136, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.055925225901664814, |
|
"grad_norm": 0.018742065876722336, |
|
"learning_rate": 7.860789104443896e-05, |
|
"loss": 11.9157, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05633948683426974, |
|
"grad_norm": 0.01904388517141342, |
|
"learning_rate": 7.643647442542382e-05, |
|
"loss": 11.9119, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05675374776687466, |
|
"grad_norm": 0.017035504803061485, |
|
"learning_rate": 7.428516933977347e-05, |
|
"loss": 11.911, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.057168008699479585, |
|
"grad_norm": 0.016319135203957558, |
|
"learning_rate": 7.215456393281776e-05, |
|
"loss": 11.9136, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.05758226963208451, |
|
"grad_norm": 0.01736661046743393, |
|
"learning_rate": 7.004524069080096e-05, |
|
"loss": 11.915, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.05799653056468943, |
|
"grad_norm": 0.019759438931941986, |
|
"learning_rate": 6.795777628163599e-05, |
|
"loss": 11.9149, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.058410791497294355, |
|
"grad_norm": 0.022349173203110695, |
|
"learning_rate": 6.58927413972491e-05, |
|
"loss": 11.9128, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.058825052429899286, |
|
"grad_norm": 0.018443025648593903, |
|
"learning_rate": 6.385070059755846e-05, |
|
"loss": 11.9121, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.05923931336250421, |
|
"grad_norm": 0.017736097797751427, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 11.9142, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.05965357429510913, |
|
"grad_norm": 0.01825244352221489, |
|
"learning_rate": 5.983782790754623e-05, |
|
"loss": 11.9138, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.060067835227714056, |
|
"grad_norm": 0.0385829322040081, |
|
"learning_rate": 5.786809309654982e-05, |
|
"loss": 11.9141, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06048209616031898, |
|
"grad_norm": 0.027011767029762268, |
|
"learning_rate": 5.592354622896944e-05, |
|
"loss": 11.9124, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0608963570929239, |
|
"grad_norm": 0.01691672019660473, |
|
"learning_rate": 5.40047189245025e-05, |
|
"loss": 11.9135, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.06131061802552883, |
|
"grad_norm": 0.025425339117646217, |
|
"learning_rate": 5.211213577137469e-05, |
|
"loss": 11.9132, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06172487895813376, |
|
"grad_norm": 0.023960987105965614, |
|
"learning_rate": 5.024631418292274e-05, |
|
"loss": 11.9083, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.06213913989073868, |
|
"grad_norm": 0.03383847326040268, |
|
"learning_rate": 4.840776425613886e-05, |
|
"loss": 11.9097, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06213913989073868, |
|
"eval_loss": 11.91579818725586, |
|
"eval_runtime": 0.2443, |
|
"eval_samples_per_second": 204.63, |
|
"eval_steps_per_second": 28.648, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0625534008233436, |
|
"grad_norm": 0.011331605724990368, |
|
"learning_rate": 4.659698863221513e-05, |
|
"loss": 11.9162, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.06296766175594853, |
|
"grad_norm": 0.01841769553720951, |
|
"learning_rate": 4.481448235912671e-05, |
|
"loss": 11.9174, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06338192268855346, |
|
"grad_norm": 0.011241200380027294, |
|
"learning_rate": 4.306073275629044e-05, |
|
"loss": 11.9168, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.06379618362115838, |
|
"grad_norm": 0.01512005552649498, |
|
"learning_rate": 4.133621928133665e-05, |
|
"loss": 11.9139, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0642104445537633, |
|
"grad_norm": 0.020751705393195152, |
|
"learning_rate": 3.964141339903026e-05, |
|
"loss": 11.9149, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.06462470548636823, |
|
"grad_norm": 0.01683048903942108, |
|
"learning_rate": 3.797677845237696e-05, |
|
"loss": 11.9163, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.06503896641897315, |
|
"grad_norm": 0.01599193550646305, |
|
"learning_rate": 3.634276953594982e-05, |
|
"loss": 11.9185, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.06545322735157807, |
|
"grad_norm": 0.010102267377078533, |
|
"learning_rate": 3.473983337147118e-05, |
|
"loss": 11.9162, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.065867488284183, |
|
"grad_norm": 0.015134149231016636, |
|
"learning_rate": 3.316840818568315e-05, |
|
"loss": 11.9156, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.06628174921678792, |
|
"grad_norm": 0.018699511885643005, |
|
"learning_rate": 3.162892359054098e-05, |
|
"loss": 11.9171, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06669601014939285, |
|
"grad_norm": 0.016042305156588554, |
|
"learning_rate": 3.0121800465761293e-05, |
|
"loss": 11.9159, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.06711027108199777, |
|
"grad_norm": 0.01940661109983921, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 11.9143, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.06752453201460269, |
|
"grad_norm": 0.018022019416093826, |
|
"learning_rate": 2.7206277796996144e-05, |
|
"loss": 11.9143, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.06793879294720762, |
|
"grad_norm": 0.010029465891420841, |
|
"learning_rate": 2.5798675327796993e-05, |
|
"loss": 11.9146, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.06835305387981255, |
|
"grad_norm": 0.01938021183013916, |
|
"learning_rate": 2.4425028260620715e-05, |
|
"loss": 11.9165, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.06876731481241748, |
|
"grad_norm": 0.012679509818553925, |
|
"learning_rate": 2.3085712136859668e-05, |
|
"loss": 11.9142, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0691815757450224, |
|
"grad_norm": 0.014771286398172379, |
|
"learning_rate": 2.178109311216913e-05, |
|
"loss": 11.9161, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.06959583667762732, |
|
"grad_norm": 0.014276309870183468, |
|
"learning_rate": 2.0511527856363912e-05, |
|
"loss": 11.9143, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.07001009761023225, |
|
"grad_norm": 0.01856043189764023, |
|
"learning_rate": 1.927736345590839e-05, |
|
"loss": 11.9153, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.07042435854283717, |
|
"grad_norm": 0.011656641028821468, |
|
"learning_rate": 1.8078937319026654e-05, |
|
"loss": 11.9139, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0708386194754421, |
|
"grad_norm": 0.010782111436128616, |
|
"learning_rate": 1.6916577083458228e-05, |
|
"loss": 11.9123, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.07125288040804702, |
|
"grad_norm": 0.012261268682777882, |
|
"learning_rate": 1.579060052688548e-05, |
|
"loss": 11.9135, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.07166714134065194, |
|
"grad_norm": 0.017579607665538788, |
|
"learning_rate": 1.4701315480056164e-05, |
|
"loss": 11.9138, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.07208140227325686, |
|
"grad_norm": 0.010836518369615078, |
|
"learning_rate": 1.3649019742625623e-05, |
|
"loss": 11.9121, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.07249566320586179, |
|
"grad_norm": 0.01106127630919218, |
|
"learning_rate": 1.2634001001741373e-05, |
|
"loss": 11.9147, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07249566320586179, |
|
"eval_loss": 11.915741920471191, |
|
"eval_runtime": 0.2454, |
|
"eval_samples_per_second": 203.783, |
|
"eval_steps_per_second": 28.53, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07290992413846671, |
|
"grad_norm": 0.015575578436255455, |
|
"learning_rate": 1.1656536753392287e-05, |
|
"loss": 11.9162, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07332418507107163, |
|
"grad_norm": 0.018252311274409294, |
|
"learning_rate": 1.0716894226543953e-05, |
|
"loss": 11.9174, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.07373844600367657, |
|
"grad_norm": 0.020452966913580894, |
|
"learning_rate": 9.815330310080887e-06, |
|
"loss": 11.9149, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0741527069362815, |
|
"grad_norm": 0.019538022577762604, |
|
"learning_rate": 8.952091482575824e-06, |
|
"loss": 11.9159, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.07456696786888642, |
|
"grad_norm": 0.016847524791955948, |
|
"learning_rate": 8.127413744904804e-06, |
|
"loss": 11.9151, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07498122880149134, |
|
"grad_norm": 0.01843886263668537, |
|
"learning_rate": 7.34152255572697e-06, |
|
"loss": 11.9139, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.07539548973409627, |
|
"grad_norm": 0.019982196390628815, |
|
"learning_rate": 6.594632769846353e-06, |
|
"loss": 11.9164, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.07580975066670119, |
|
"grad_norm": 0.021740185096859932, |
|
"learning_rate": 5.886948579472778e-06, |
|
"loss": 11.9154, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.07622401159930611, |
|
"grad_norm": 0.016634231433272362, |
|
"learning_rate": 5.218663458397715e-06, |
|
"loss": 11.9136, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.07663827253191104, |
|
"grad_norm": 0.020411711186170578, |
|
"learning_rate": 4.589960109100444e-06, |
|
"loss": 11.9147, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.07705253346451596, |
|
"grad_norm": 0.01920330338180065, |
|
"learning_rate": 4.001010412799138e-06, |
|
"loss": 11.9133, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.07746679439712088, |
|
"grad_norm": 0.011669190600514412, |
|
"learning_rate": 3.451975382460109e-06, |
|
"loss": 11.9145, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.0778810553297258, |
|
"grad_norm": 0.01726020686328411, |
|
"learning_rate": 2.9430051187785962e-06, |
|
"loss": 11.916, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.07829531626233073, |
|
"grad_norm": 0.018781933933496475, |
|
"learning_rate": 2.4742387691426445e-06, |
|
"loss": 11.9125, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.07870957719493565, |
|
"grad_norm": 0.016961760818958282, |
|
"learning_rate": 2.0458044895916513e-06, |
|
"loss": 11.916, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07912383812754059, |
|
"grad_norm": 0.017457900568842888, |
|
"learning_rate": 1.6578194097797258e-06, |
|
"loss": 11.9143, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.07953809906014551, |
|
"grad_norm": 0.014584069140255451, |
|
"learning_rate": 1.3103896009537207e-06, |
|
"loss": 11.9143, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.07995235999275044, |
|
"grad_norm": 0.017070403322577477, |
|
"learning_rate": 1.0036100469542786e-06, |
|
"loss": 11.9154, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.08036662092535536, |
|
"grad_norm": 0.017877068370580673, |
|
"learning_rate": 7.375646182482875e-07, |
|
"loss": 11.9087, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.08078088185796028, |
|
"grad_norm": 0.01924355886876583, |
|
"learning_rate": 5.123260489995229e-07, |
|
"loss": 11.9126, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.08119514279056521, |
|
"grad_norm": 0.03216341882944107, |
|
"learning_rate": 3.2795591718381975e-07, |
|
"loss": 11.9105, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.08160940372317013, |
|
"grad_norm": 0.02557109296321869, |
|
"learning_rate": 1.8450462775428942e-07, |
|
"loss": 11.9097, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.08202366465577506, |
|
"grad_norm": 0.02249646559357643, |
|
"learning_rate": 8.201139886109264e-08, |
|
"loss": 11.9116, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.08243792558837998, |
|
"grad_norm": 0.031640082597732544, |
|
"learning_rate": 2.0504251129649374e-08, |
|
"loss": 11.9138, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.0828521865209849, |
|
"grad_norm": 0.02583852969110012, |
|
"learning_rate": 0.0, |
|
"loss": 11.9111, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0828521865209849, |
|
"eval_loss": 11.915698051452637, |
|
"eval_runtime": 0.2451, |
|
"eval_samples_per_second": 204.03, |
|
"eval_steps_per_second": 28.564, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 318524620800.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|