|
{ |
|
"best_metric": 0.08705329149961472, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.5673758865248227, |
|
"eval_steps": 25, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005673758865248227, |
|
"grad_norm": 0.6567320823669434, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.4071, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005673758865248227, |
|
"eval_loss": 0.22036334872245789, |
|
"eval_runtime": 6.6422, |
|
"eval_samples_per_second": 7.528, |
|
"eval_steps_per_second": 1.054, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011347517730496455, |
|
"grad_norm": 0.6732487678527832, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.3639, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01702127659574468, |
|
"grad_norm": 0.5613159537315369, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.3219, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02269503546099291, |
|
"grad_norm": 0.45505204796791077, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.3377, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.028368794326241134, |
|
"grad_norm": 0.4392223358154297, |
|
"learning_rate": 0.00015, |
|
"loss": 0.3325, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03404255319148936, |
|
"grad_norm": 0.4434760510921478, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.3476, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03971631205673759, |
|
"grad_norm": 0.2849961221218109, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.2592, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04539007092198582, |
|
"grad_norm": 0.22268033027648926, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.2355, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05106382978723404, |
|
"grad_norm": 0.6029117703437805, |
|
"learning_rate": 0.00027, |
|
"loss": 0.3223, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05673758865248227, |
|
"grad_norm": 0.543067216873169, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2553, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.062411347517730496, |
|
"grad_norm": 0.265033483505249, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 0.2578, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06808510638297872, |
|
"grad_norm": 0.14402134716510773, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 0.1828, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07375886524822695, |
|
"grad_norm": 0.1815207153558731, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 0.2784, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07943262411347518, |
|
"grad_norm": 0.17301031947135925, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 0.2375, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 0.1395721584558487, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 0.1957, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09078014184397164, |
|
"grad_norm": 0.14996208250522614, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 0.2279, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09645390070921986, |
|
"grad_norm": 0.1510487198829651, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 0.1986, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.10212765957446808, |
|
"grad_norm": 0.1490045040845871, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 0.1931, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10780141843971631, |
|
"grad_norm": 0.17285487055778503, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 0.1473, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11347517730496454, |
|
"grad_norm": 0.13048504292964935, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 0.0971, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11914893617021277, |
|
"grad_norm": 0.12347353994846344, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 0.0713, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12482269503546099, |
|
"grad_norm": 0.07937260717153549, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 0.0287, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.13049645390070921, |
|
"grad_norm": 0.08024411648511887, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 0.0298, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13617021276595745, |
|
"grad_norm": 0.09828471392393112, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 0.0294, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.14184397163120568, |
|
"grad_norm": 0.09629218280315399, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 0.0208, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14184397163120568, |
|
"eval_loss": 0.10909587889909744, |
|
"eval_runtime": 6.7173, |
|
"eval_samples_per_second": 7.443, |
|
"eval_steps_per_second": 1.042, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1475177304964539, |
|
"grad_norm": 0.0800151377916336, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 0.0173, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.15319148936170213, |
|
"grad_norm": 0.08075784146785736, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 0.018, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15886524822695036, |
|
"grad_norm": 0.10134506225585938, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 0.0199, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.16453900709219857, |
|
"grad_norm": 0.07547775655984879, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.0122, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 0.10290510952472687, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 0.0143, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17588652482269504, |
|
"grad_norm": 0.1462346613407135, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 0.0203, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.18156028368794327, |
|
"grad_norm": 0.08281629532575607, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 0.0166, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.18723404255319148, |
|
"grad_norm": 0.09583016484975815, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 0.0207, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.19290780141843972, |
|
"grad_norm": 0.08298071473836899, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 0.0135, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.19858156028368795, |
|
"grad_norm": 0.12078027427196503, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 0.014, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.20425531914893616, |
|
"grad_norm": 0.07853731513023376, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 0.0129, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2099290780141844, |
|
"grad_norm": 0.08032602816820145, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 0.0125, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.21560283687943263, |
|
"grad_norm": 0.07942873239517212, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 0.0127, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.22127659574468084, |
|
"grad_norm": 0.11203460395336151, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 0.015, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.22695035460992907, |
|
"grad_norm": 0.10632278770208359, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 0.0168, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2326241134751773, |
|
"grad_norm": 0.12105297297239304, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 0.0148, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.23829787234042554, |
|
"grad_norm": 0.16477319598197937, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 0.0224, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.24397163120567375, |
|
"grad_norm": 0.20291352272033691, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 0.0339, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.24964539007092199, |
|
"grad_norm": 0.5795202851295471, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 0.0607, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 0.3015036880970001, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 0.2348, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.26099290780141843, |
|
"grad_norm": 0.21086736023426056, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 0.1987, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.1581435650587082, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 0.1761, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2723404255319149, |
|
"grad_norm": 0.17156659066677094, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.2314, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.27801418439716313, |
|
"grad_norm": 0.15111802518367767, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 0.1813, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.28368794326241137, |
|
"grad_norm": 0.1316436380147934, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 0.1325, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28368794326241137, |
|
"eval_loss": 0.09500084072351456, |
|
"eval_runtime": 6.6628, |
|
"eval_samples_per_second": 7.504, |
|
"eval_steps_per_second": 1.051, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28936170212765955, |
|
"grad_norm": 0.15596741437911987, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 0.2319, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2950354609929078, |
|
"grad_norm": 0.15342959761619568, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 0.2409, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.300709219858156, |
|
"grad_norm": 0.5929403901100159, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 0.1871, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.30638297872340425, |
|
"grad_norm": 0.1430080085992813, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 0.2278, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3120567375886525, |
|
"grad_norm": 0.2504333555698395, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 0.2109, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3177304964539007, |
|
"grad_norm": 0.36393240094184875, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 0.1844, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.32340425531914896, |
|
"grad_norm": 0.13066324591636658, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 0.2173, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.32907801418439714, |
|
"grad_norm": 0.13164402544498444, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 0.2108, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3347517730496454, |
|
"grad_norm": 0.14740315079689026, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 0.2271, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 0.15507878363132477, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 0.2442, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34609929078014184, |
|
"grad_norm": 0.13118167221546173, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 0.1877, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3517730496453901, |
|
"grad_norm": 0.14413724839687347, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 0.214, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3574468085106383, |
|
"grad_norm": 0.11405673623085022, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 0.0906, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.36312056737588655, |
|
"grad_norm": 0.10695318877696991, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 0.0675, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.36879432624113473, |
|
"grad_norm": 0.09675230830907822, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 0.0587, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.37446808510638296, |
|
"grad_norm": 0.09902877360582352, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 0.0707, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3801418439716312, |
|
"grad_norm": 0.0778760313987732, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.0319, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.38581560283687943, |
|
"grad_norm": 0.06342566013336182, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 0.0235, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.39148936170212767, |
|
"grad_norm": 0.0666981041431427, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 0.0202, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3971631205673759, |
|
"grad_norm": 0.08664432168006897, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 0.017, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.40283687943262414, |
|
"grad_norm": 0.09265201538801193, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 0.0091, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4085106382978723, |
|
"grad_norm": 0.06965811550617218, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 0.0098, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.41418439716312055, |
|
"grad_norm": 0.0695430338382721, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 0.0092, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4198581560283688, |
|
"grad_norm": 0.08358810096979141, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 0.0121, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 0.059993814677000046, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 0.0082, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"eval_loss": 0.09436039626598358, |
|
"eval_runtime": 6.6489, |
|
"eval_samples_per_second": 7.52, |
|
"eval_steps_per_second": 1.053, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.43120567375886526, |
|
"grad_norm": 0.09175027161836624, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 0.0084, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4368794326241135, |
|
"grad_norm": 0.06123727932572365, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 0.0094, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4425531914893617, |
|
"grad_norm": 0.11089751869440079, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 0.0071, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4482269503546099, |
|
"grad_norm": 0.06789694726467133, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 0.0073, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.45390070921985815, |
|
"grad_norm": 0.06642229110002518, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 0.0068, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4595744680851064, |
|
"grad_norm": 0.0795132964849472, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 0.0051, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4652482269503546, |
|
"grad_norm": 0.11351794749498367, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 0.0108, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.47092198581560285, |
|
"grad_norm": 0.07391192764043808, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 0.0102, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.4765957446808511, |
|
"grad_norm": 0.08752062171697617, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 0.0074, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.48226950354609927, |
|
"grad_norm": 0.10944165289402008, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 0.01, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4879432624113475, |
|
"grad_norm": 0.13292644917964935, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.0152, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.49361702127659574, |
|
"grad_norm": 0.16543662548065186, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 0.0175, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.49929078014184397, |
|
"grad_norm": 0.4206661880016327, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 0.0632, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5049645390070922, |
|
"grad_norm": 0.2616719901561737, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 0.1991, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 0.2395102083683014, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 0.2178, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5163120567375886, |
|
"grad_norm": 0.16154062747955322, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 0.1978, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5219858156028369, |
|
"grad_norm": 0.13820257782936096, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 0.1875, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5276595744680851, |
|
"grad_norm": 0.15515460073947906, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 0.2294, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.14044955372810364, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 0.1898, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5390070921985816, |
|
"grad_norm": 0.1499217450618744, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 0.208, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5446808510638298, |
|
"grad_norm": 0.1417035013437271, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 0.2009, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.550354609929078, |
|
"grad_norm": 0.12359917163848877, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 0.1748, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5560283687943263, |
|
"grad_norm": 0.1188841164112091, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 0.1583, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5617021276595745, |
|
"grad_norm": 0.1371552050113678, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 0.1704, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5673758865248227, |
|
"grad_norm": 0.14197076857089996, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 0.1849, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5673758865248227, |
|
"eval_loss": 0.08705329149961472, |
|
"eval_runtime": 6.6626, |
|
"eval_samples_per_second": 7.505, |
|
"eval_steps_per_second": 1.051, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.60950229649326e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|