|
{ |
|
"best_metric": 11.915943145751953, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.04142609326049245, |
|
"eval_steps": 25, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004142609326049245, |
|
"grad_norm": 0.009111136198043823, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 11.9291, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0004142609326049245, |
|
"eval_loss": 11.93281364440918, |
|
"eval_runtime": 0.2456, |
|
"eval_samples_per_second": 203.617, |
|
"eval_steps_per_second": 28.506, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000828521865209849, |
|
"grad_norm": 0.01578676700592041, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 11.9311, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0012427827978147737, |
|
"grad_norm": 0.010788201354444027, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 11.9303, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.001657043730419698, |
|
"grad_norm": 0.014519501477479935, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 11.9288, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0020713046630246226, |
|
"grad_norm": 0.01510781329125166, |
|
"learning_rate": 0.00015, |
|
"loss": 11.9304, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0024855655956295473, |
|
"grad_norm": 0.013269297778606415, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 11.9294, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0028998265282344717, |
|
"grad_norm": 0.013751151971518993, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 11.9321, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.003314087460839396, |
|
"grad_norm": 0.015573951415717602, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 11.9287, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0037283483934443208, |
|
"grad_norm": 0.009504775516688824, |
|
"learning_rate": 0.00027, |
|
"loss": 11.9305, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004142609326049245, |
|
"grad_norm": 0.010801173746585846, |
|
"learning_rate": 0.0003, |
|
"loss": 11.9337, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0045568702586541695, |
|
"grad_norm": 0.011178042739629745, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 11.9278, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004971131191259095, |
|
"grad_norm": 0.014659388922154903, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 11.9297, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005385392123864019, |
|
"grad_norm": 0.013904299587011337, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 11.9315, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005799653056468943, |
|
"grad_norm": 0.014901366084814072, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 11.9306, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.006213913989073868, |
|
"grad_norm": 0.014373673126101494, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 11.9291, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006628174921678792, |
|
"grad_norm": 0.012232257053256035, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 11.9292, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.007042435854283717, |
|
"grad_norm": 0.011413573287427425, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 11.9322, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0074566967868886416, |
|
"grad_norm": 0.016656706109642982, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 11.9324, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007870957719493567, |
|
"grad_norm": 0.014373266138136387, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 11.9318, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00828521865209849, |
|
"grad_norm": 0.015115369111299515, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 11.9287, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008699479584703415, |
|
"grad_norm": 0.01734703592956066, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 11.9309, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.009113740517308339, |
|
"grad_norm": 0.014644989743828773, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 11.9254, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009528001449913264, |
|
"grad_norm": 0.021497027948498726, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 11.9302, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00994226238251819, |
|
"grad_norm": 0.01817285642027855, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 11.9312, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.010356523315123113, |
|
"grad_norm": 0.02207845263183117, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 11.9284, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010356523315123113, |
|
"eval_loss": 11.93165397644043, |
|
"eval_runtime": 0.2441, |
|
"eval_samples_per_second": 204.847, |
|
"eval_steps_per_second": 28.679, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010770784247728038, |
|
"grad_norm": 0.017103340476751328, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 11.9297, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.011185045180332961, |
|
"grad_norm": 0.0265261959284544, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 11.9303, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011599306112937887, |
|
"grad_norm": 0.031121160835027695, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 11.9264, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.012013567045542812, |
|
"grad_norm": 0.020733291283249855, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 11.9311, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.012427827978147735, |
|
"grad_norm": 0.019345130771398544, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 11.9295, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01284208891075266, |
|
"grad_norm": 0.025070849806070328, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 11.9307, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.013256349843357584, |
|
"grad_norm": 0.02109096758067608, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 11.927, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01367061077596251, |
|
"grad_norm": 0.027676530182361603, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 11.9296, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.014084871708567434, |
|
"grad_norm": 0.02774973399937153, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 11.9294, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.014499132641172358, |
|
"grad_norm": 0.026240255683660507, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 11.9283, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.014913393573777283, |
|
"grad_norm": 0.03300342708826065, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 11.9277, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.015327654506382208, |
|
"grad_norm": 0.03084516152739525, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 11.9284, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.015741915438987134, |
|
"grad_norm": 0.03772755712270737, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 11.9301, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.016156176371592057, |
|
"grad_norm": 0.044647328555583954, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 11.929, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01657043730419698, |
|
"grad_norm": 0.0458788201212883, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 11.9261, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016984698236801904, |
|
"grad_norm": 0.062342319637537, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 11.9292, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.01739895916940683, |
|
"grad_norm": 0.04045253247022629, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 11.9276, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.017813220102011754, |
|
"grad_norm": 0.05095702409744263, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 11.9244, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.018227481034616678, |
|
"grad_norm": 0.04911315068602562, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 11.9264, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.018641741967221605, |
|
"grad_norm": 0.04607488214969635, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 11.9244, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.019056002899826528, |
|
"grad_norm": 0.05658293142914772, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 11.9224, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01947026383243145, |
|
"grad_norm": 0.050977252423763275, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 11.9255, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01988452476503638, |
|
"grad_norm": 0.06175487861037254, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 11.9218, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.020298785697641302, |
|
"grad_norm": 0.049341753125190735, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 11.927, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.020713046630246226, |
|
"grad_norm": 0.0748741626739502, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 11.9258, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.020713046630246226, |
|
"eval_loss": 11.926169395446777, |
|
"eval_runtime": 0.2445, |
|
"eval_samples_per_second": 204.526, |
|
"eval_steps_per_second": 28.634, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021127307562851153, |
|
"grad_norm": 0.06076189503073692, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 11.9242, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.021541568495456076, |
|
"grad_norm": 0.0622175894677639, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 11.9228, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.021955829428061, |
|
"grad_norm": 0.05812002345919609, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 11.9241, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.022370090360665923, |
|
"grad_norm": 0.05627156049013138, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 11.9206, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02278435129327085, |
|
"grad_norm": 0.04959167540073395, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 11.9216, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.023198612225875773, |
|
"grad_norm": 0.05402550846338272, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 11.9216, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.023612873158480697, |
|
"grad_norm": 0.049663037061691284, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 11.9207, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.024027134091085624, |
|
"grad_norm": 0.045585885643959045, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 11.9232, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.024441395023690547, |
|
"grad_norm": 0.05635461211204529, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 11.922, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02485565595629547, |
|
"grad_norm": 0.05952185392379761, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 11.9209, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.025269916888900398, |
|
"grad_norm": 0.04048493504524231, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 11.9213, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02568417782150532, |
|
"grad_norm": 0.050521329045295715, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 11.9192, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.026098438754110245, |
|
"grad_norm": 0.043353911489248276, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 11.9202, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.026512699686715168, |
|
"grad_norm": 0.0434187576174736, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 11.9171, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.026926960619320095, |
|
"grad_norm": 0.0419292189180851, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 11.9196, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02734122155192502, |
|
"grad_norm": 0.04601728916168213, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 11.9174, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.027755482484529942, |
|
"grad_norm": 0.047061558812856674, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 11.9189, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02816974341713487, |
|
"grad_norm": 0.039436087012290955, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 11.9178, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.028584004349739792, |
|
"grad_norm": 0.03027687780559063, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 11.9175, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.028998265282344716, |
|
"grad_norm": 0.03565209358930588, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 11.9168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.029412526214949643, |
|
"grad_norm": 0.02530493400990963, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 11.9163, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.029826787147554566, |
|
"grad_norm": 0.0354970321059227, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 11.9154, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03024104808015949, |
|
"grad_norm": 0.018435046076774597, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 11.9158, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.030655309012764417, |
|
"grad_norm": 0.02600877545773983, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 11.9121, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03106956994536934, |
|
"grad_norm": 0.032639019191265106, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 11.9143, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03106956994536934, |
|
"eval_loss": 11.916912078857422, |
|
"eval_runtime": 0.2447, |
|
"eval_samples_per_second": 204.352, |
|
"eval_steps_per_second": 28.609, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03148383087797427, |
|
"grad_norm": 0.026764797046780586, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 11.9196, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03189809181057919, |
|
"grad_norm": 0.0315483957529068, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 11.9159, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.032312352743184114, |
|
"grad_norm": 0.029291000217199326, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 11.9156, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03272661367578904, |
|
"grad_norm": 0.02186393365263939, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 11.9126, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03314087460839396, |
|
"grad_norm": 0.02193310484290123, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 11.9144, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.033555135540998884, |
|
"grad_norm": 0.027057521045207977, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 11.9136, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03396939647360381, |
|
"grad_norm": 0.028718745335936546, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 11.9155, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03438365740620874, |
|
"grad_norm": 0.03541611507534981, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 11.9161, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03479791833881366, |
|
"grad_norm": 0.027952907606959343, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 11.9165, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.035212179271418585, |
|
"grad_norm": 0.02944616600871086, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 11.9162, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03562644020402351, |
|
"grad_norm": 0.028091363608837128, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 11.9145, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03604070113662843, |
|
"grad_norm": 0.01949743553996086, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 11.9126, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.036454962069233356, |
|
"grad_norm": 0.03287213295698166, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 11.9162, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.036869223001838286, |
|
"grad_norm": 0.024355346336960793, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 11.9142, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.03728348393444321, |
|
"grad_norm": 0.02432161383330822, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 11.9123, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03769774486704813, |
|
"grad_norm": 0.02186986431479454, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 11.9155, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.038112005799653056, |
|
"grad_norm": 0.02595347724854946, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 11.9145, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.03852626673225798, |
|
"grad_norm": 0.01945437304675579, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 11.9142, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0389405276648629, |
|
"grad_norm": 0.034285496920347214, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 11.9142, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03935478859746783, |
|
"grad_norm": 0.019000215455889702, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 11.9144, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03976904953007276, |
|
"grad_norm": 0.027748456224799156, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 11.9086, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04018331046267768, |
|
"grad_norm": 0.02137632481753826, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 11.9144, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.040597571395282604, |
|
"grad_norm": 0.02740846946835518, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 11.9143, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04101183232788753, |
|
"grad_norm": 0.02057352475821972, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 11.912, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04142609326049245, |
|
"grad_norm": 0.036128297448158264, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 11.9097, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04142609326049245, |
|
"eval_loss": 11.915943145751953, |
|
"eval_runtime": 0.2452, |
|
"eval_samples_per_second": 203.942, |
|
"eval_steps_per_second": 28.552, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 159262310400.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|