|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9991004497751125, |
|
"eval_steps": 500, |
|
"global_step": 833, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011994002998500749, |
|
"grad_norm": 0.11149752140045166, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 10.3757, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023988005997001498, |
|
"grad_norm": 0.1320822536945343, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 10.3748, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003598200899550225, |
|
"grad_norm": 0.13625332713127136, |
|
"learning_rate": 3e-06, |
|
"loss": 10.3734, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0047976011994002995, |
|
"grad_norm": 0.1434432715177536, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 10.3739, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005997001499250375, |
|
"grad_norm": 0.14035488665103912, |
|
"learning_rate": 5e-06, |
|
"loss": 10.3765, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00719640179910045, |
|
"grad_norm": 0.14915668964385986, |
|
"learning_rate": 6e-06, |
|
"loss": 10.3743, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.008395802098950524, |
|
"grad_norm": 0.15720613300800323, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 10.3724, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.009595202398800599, |
|
"grad_norm": 0.15728804469108582, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 10.3741, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.010794602698650674, |
|
"grad_norm": 0.16915467381477356, |
|
"learning_rate": 9e-06, |
|
"loss": 10.3754, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01199400299850075, |
|
"grad_norm": 0.17764562368392944, |
|
"learning_rate": 1e-05, |
|
"loss": 10.3721, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013193403298350824, |
|
"grad_norm": 0.19067725539207458, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 10.3733, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0143928035982009, |
|
"grad_norm": 0.20686227083206177, |
|
"learning_rate": 1.2e-05, |
|
"loss": 10.3718, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.015592203898050975, |
|
"grad_norm": 0.13864944875240326, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 10.3757, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.016791604197901048, |
|
"grad_norm": 0.13187511265277863, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 10.3746, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.017991004497751123, |
|
"grad_norm": 0.13840411603450775, |
|
"learning_rate": 1.5e-05, |
|
"loss": 10.3745, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.019190404797601198, |
|
"grad_norm": 0.13771533966064453, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 10.3757, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.020389805097451273, |
|
"grad_norm": 0.144022136926651, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 10.3742, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02158920539730135, |
|
"grad_norm": 0.14401449263095856, |
|
"learning_rate": 1.8e-05, |
|
"loss": 10.3742, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.022788605697151423, |
|
"grad_norm": 0.15697641670703888, |
|
"learning_rate": 1.9e-05, |
|
"loss": 10.3749, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0239880059970015, |
|
"grad_norm": 0.15877871215343475, |
|
"learning_rate": 2e-05, |
|
"loss": 10.3725, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.025187406296851574, |
|
"grad_norm": 0.16563433408737183, |
|
"learning_rate": 2.1e-05, |
|
"loss": 10.3743, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02638680659670165, |
|
"grad_norm": 0.17025835812091827, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 10.3738, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.027586206896551724, |
|
"grad_norm": 0.17753835022449493, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 10.3732, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0287856071964018, |
|
"grad_norm": 0.19433385133743286, |
|
"learning_rate": 2.4e-05, |
|
"loss": 10.373, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.029985007496251874, |
|
"grad_norm": 0.2346523553133011, |
|
"learning_rate": 2.5e-05, |
|
"loss": 10.3702, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03118440779610195, |
|
"grad_norm": 0.11483809351921082, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 10.3731, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.032383808095952024, |
|
"grad_norm": 0.12950515747070312, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 10.3734, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.033583208395802096, |
|
"grad_norm": 0.13586033880710602, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 10.3725, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.034782608695652174, |
|
"grad_norm": 0.14205914735794067, |
|
"learning_rate": 2.9e-05, |
|
"loss": 10.375, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.035982008995502246, |
|
"grad_norm": 0.14829206466674805, |
|
"learning_rate": 3e-05, |
|
"loss": 10.3727, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.037181409295352325, |
|
"grad_norm": 0.15150436758995056, |
|
"learning_rate": 3.1e-05, |
|
"loss": 10.3721, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.038380809595202396, |
|
"grad_norm": 0.1555749624967575, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 10.3742, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.039580209895052475, |
|
"grad_norm": 0.1620868444442749, |
|
"learning_rate": 3.3e-05, |
|
"loss": 10.3729, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.040779610194902546, |
|
"grad_norm": 0.1725643426179886, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 10.3718, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.041979010494752625, |
|
"grad_norm": 0.18715962767601013, |
|
"learning_rate": 3.5e-05, |
|
"loss": 10.371, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0431784107946027, |
|
"grad_norm": 0.19183875620365143, |
|
"learning_rate": 3.6e-05, |
|
"loss": 10.3718, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.044377811094452775, |
|
"grad_norm": 0.21521849930286407, |
|
"learning_rate": 3.7e-05, |
|
"loss": 10.3719, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04557721139430285, |
|
"grad_norm": 0.1310717910528183, |
|
"learning_rate": 3.8e-05, |
|
"loss": 10.3731, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.046776611694152925, |
|
"grad_norm": 0.12645111978054047, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 10.3728, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.047976011994003, |
|
"grad_norm": 0.1375029981136322, |
|
"learning_rate": 4e-05, |
|
"loss": 10.3719, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.049175412293853075, |
|
"grad_norm": 0.1415010392665863, |
|
"learning_rate": 4.1e-05, |
|
"loss": 10.3723, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05037481259370315, |
|
"grad_norm": 0.1474965512752533, |
|
"learning_rate": 4.2e-05, |
|
"loss": 10.3708, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.051574212893553226, |
|
"grad_norm": 0.1504737138748169, |
|
"learning_rate": 4.3e-05, |
|
"loss": 10.3718, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0527736131934033, |
|
"grad_norm": 0.15808707475662231, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 10.3712, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.053973013493253376, |
|
"grad_norm": 0.16332747042179108, |
|
"learning_rate": 4.5e-05, |
|
"loss": 10.3695, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05517241379310345, |
|
"grad_norm": 0.17212961614131927, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 10.3692, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.056371814092953526, |
|
"grad_norm": 0.17262108623981476, |
|
"learning_rate": 4.7e-05, |
|
"loss": 10.3705, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0575712143928036, |
|
"grad_norm": 0.1849043071269989, |
|
"learning_rate": 4.8e-05, |
|
"loss": 10.3671, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.058770614692653676, |
|
"grad_norm": 0.19811047613620758, |
|
"learning_rate": 4.9e-05, |
|
"loss": 10.3671, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05997001499250375, |
|
"grad_norm": 0.2461751103401184, |
|
"learning_rate": 5e-05, |
|
"loss": 10.3667, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.061169415292353826, |
|
"grad_norm": 0.12041845172643661, |
|
"learning_rate": 5.1000000000000006e-05, |
|
"loss": 10.371, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0623688155922039, |
|
"grad_norm": 0.12933555245399475, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 10.3694, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06356821589205397, |
|
"grad_norm": 0.14095033705234528, |
|
"learning_rate": 5.300000000000001e-05, |
|
"loss": 10.3693, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06476761619190405, |
|
"grad_norm": 0.14537349343299866, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 10.3671, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06596701649175413, |
|
"grad_norm": 0.1486896276473999, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 10.3671, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06716641679160419, |
|
"grad_norm": 0.15299569070339203, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 10.3668, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06836581709145427, |
|
"grad_norm": 0.16295485198497772, |
|
"learning_rate": 5.6999999999999996e-05, |
|
"loss": 10.3653, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06956521739130435, |
|
"grad_norm": 0.16358605027198792, |
|
"learning_rate": 5.8e-05, |
|
"loss": 10.3661, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07076461769115443, |
|
"grad_norm": 0.17602834105491638, |
|
"learning_rate": 5.9e-05, |
|
"loss": 10.3633, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07196401799100449, |
|
"grad_norm": 0.18307778239250183, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3636, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07316341829085457, |
|
"grad_norm": 0.19053678214550018, |
|
"learning_rate": 6.1e-05, |
|
"loss": 10.3633, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07436281859070465, |
|
"grad_norm": 0.20003724098205566, |
|
"learning_rate": 6.2e-05, |
|
"loss": 10.3622, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07556221889055473, |
|
"grad_norm": 0.15672719478607178, |
|
"learning_rate": 6.3e-05, |
|
"loss": 10.3644, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07676161919040479, |
|
"grad_norm": 0.14309260249137878, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 10.3649, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07796101949025487, |
|
"grad_norm": 0.1525091975927353, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 10.3646, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07916041979010495, |
|
"grad_norm": 0.15806975960731506, |
|
"learning_rate": 6.6e-05, |
|
"loss": 10.3623, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08035982008995503, |
|
"grad_norm": 0.16560155153274536, |
|
"learning_rate": 6.7e-05, |
|
"loss": 10.3609, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.08155922038980509, |
|
"grad_norm": 0.16659671068191528, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 10.3601, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08275862068965517, |
|
"grad_norm": 0.18391086161136627, |
|
"learning_rate": 6.9e-05, |
|
"loss": 10.3617, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.08395802098950525, |
|
"grad_norm": 0.1868380904197693, |
|
"learning_rate": 7e-05, |
|
"loss": 10.3576, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08515742128935533, |
|
"grad_norm": 0.20636723935604095, |
|
"learning_rate": 7.1e-05, |
|
"loss": 10.357, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0863568215892054, |
|
"grad_norm": 0.2090313583612442, |
|
"learning_rate": 7.2e-05, |
|
"loss": 10.3535, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08755622188905547, |
|
"grad_norm": 0.23220857977867126, |
|
"learning_rate": 7.3e-05, |
|
"loss": 10.355, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08875562218890555, |
|
"grad_norm": 0.23836413025856018, |
|
"learning_rate": 7.4e-05, |
|
"loss": 10.3502, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08995502248875563, |
|
"grad_norm": 0.2619498670101166, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 10.3485, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0911544227886057, |
|
"grad_norm": 0.20814213156700134, |
|
"learning_rate": 7.6e-05, |
|
"loss": 10.3572, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09235382308845577, |
|
"grad_norm": 0.2424175888299942, |
|
"learning_rate": 7.7e-05, |
|
"loss": 10.3524, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.09355322338830585, |
|
"grad_norm": 0.2402586191892624, |
|
"learning_rate": 7.800000000000001e-05, |
|
"loss": 10.3501, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09475262368815592, |
|
"grad_norm": 0.27135393023490906, |
|
"learning_rate": 7.900000000000001e-05, |
|
"loss": 10.3486, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.095952023988006, |
|
"grad_norm": 0.279787540435791, |
|
"learning_rate": 8e-05, |
|
"loss": 10.3447, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09715142428785607, |
|
"grad_norm": 0.28797608613967896, |
|
"learning_rate": 8.1e-05, |
|
"loss": 10.3438, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09835082458770615, |
|
"grad_norm": 0.3241998851299286, |
|
"learning_rate": 8.2e-05, |
|
"loss": 10.3392, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09955022488755622, |
|
"grad_norm": 0.3485376238822937, |
|
"learning_rate": 8.3e-05, |
|
"loss": 10.3364, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1007496251874063, |
|
"grad_norm": 0.3528934419155121, |
|
"learning_rate": 8.4e-05, |
|
"loss": 10.3339, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10194902548725637, |
|
"grad_norm": 0.361213743686676, |
|
"learning_rate": 8.5e-05, |
|
"loss": 10.3271, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.10314842578710645, |
|
"grad_norm": 0.3762340843677521, |
|
"learning_rate": 8.6e-05, |
|
"loss": 10.3242, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10434782608695652, |
|
"grad_norm": 0.3962249159812927, |
|
"learning_rate": 8.7e-05, |
|
"loss": 10.3198, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1055472263868066, |
|
"grad_norm": 0.4154474139213562, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 10.3241, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.10674662668665667, |
|
"grad_norm": 0.42189449071884155, |
|
"learning_rate": 8.900000000000001e-05, |
|
"loss": 10.319, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.10794602698650675, |
|
"grad_norm": 0.3983931541442871, |
|
"learning_rate": 9e-05, |
|
"loss": 10.3142, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10914542728635682, |
|
"grad_norm": 0.39982685446739197, |
|
"learning_rate": 9.1e-05, |
|
"loss": 10.3076, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1103448275862069, |
|
"grad_norm": 0.3935539424419403, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 10.3047, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11154422788605697, |
|
"grad_norm": 0.3751447796821594, |
|
"learning_rate": 9.300000000000001e-05, |
|
"loss": 10.2953, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.11274362818590705, |
|
"grad_norm": 0.3766322135925293, |
|
"learning_rate": 9.4e-05, |
|
"loss": 10.2932, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11394302848575712, |
|
"grad_norm": 0.3571270704269409, |
|
"learning_rate": 9.5e-05, |
|
"loss": 10.2875, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1151424287856072, |
|
"grad_norm": 0.34838995337486267, |
|
"learning_rate": 9.6e-05, |
|
"loss": 10.2812, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11634182908545727, |
|
"grad_norm": 0.33183571696281433, |
|
"learning_rate": 9.7e-05, |
|
"loss": 10.276, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.11754122938530735, |
|
"grad_norm": 0.3224335312843323, |
|
"learning_rate": 9.8e-05, |
|
"loss": 10.2705, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11874062968515742, |
|
"grad_norm": 0.33488717675209045, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 10.2681, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1199400299850075, |
|
"grad_norm": 0.3330170810222626, |
|
"learning_rate": 0.0001, |
|
"loss": 10.2595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12113943028485757, |
|
"grad_norm": 0.29305723309516907, |
|
"learning_rate": 9.999954076906038e-05, |
|
"loss": 10.2593, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.12233883058470765, |
|
"grad_norm": 0.2937624454498291, |
|
"learning_rate": 9.999816308467719e-05, |
|
"loss": 10.2547, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12353823088455772, |
|
"grad_norm": 0.29269203543663025, |
|
"learning_rate": 9.999586697215748e-05, |
|
"loss": 10.2482, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1247376311844078, |
|
"grad_norm": 0.2853996157646179, |
|
"learning_rate": 9.999265247367908e-05, |
|
"loss": 10.2453, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.12593703148425786, |
|
"grad_norm": 0.28077051043510437, |
|
"learning_rate": 9.998851964828986e-05, |
|
"loss": 10.2412, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.12713643178410794, |
|
"grad_norm": 0.27728626132011414, |
|
"learning_rate": 9.99834685719067e-05, |
|
"loss": 10.2357, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12833583208395802, |
|
"grad_norm": 0.28931453824043274, |
|
"learning_rate": 9.997749933731398e-05, |
|
"loss": 10.2308, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1295352323838081, |
|
"grad_norm": 0.3001117408275604, |
|
"learning_rate": 9.997061205416203e-05, |
|
"loss": 10.2248, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.13073463268365818, |
|
"grad_norm": 0.33052486181259155, |
|
"learning_rate": 9.996280684896495e-05, |
|
"loss": 10.2211, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.13193403298350825, |
|
"grad_norm": 0.3081456124782562, |
|
"learning_rate": 9.995408386509846e-05, |
|
"loss": 10.2144, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13313343328335833, |
|
"grad_norm": 0.29855111241340637, |
|
"learning_rate": 9.994444326279708e-05, |
|
"loss": 10.2106, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.13433283358320838, |
|
"grad_norm": 0.31099098920822144, |
|
"learning_rate": 9.993388521915134e-05, |
|
"loss": 10.208, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.13553223388305846, |
|
"grad_norm": 0.2797197103500366, |
|
"learning_rate": 9.992240992810444e-05, |
|
"loss": 10.2032, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.13673163418290854, |
|
"grad_norm": 0.2696700692176819, |
|
"learning_rate": 9.991001760044875e-05, |
|
"loss": 10.1995, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 0.2778705954551697, |
|
"learning_rate": 9.989670846382188e-05, |
|
"loss": 10.1948, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1391304347826087, |
|
"grad_norm": 0.28072160482406616, |
|
"learning_rate": 9.988248276270248e-05, |
|
"loss": 10.1918, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.14032983508245878, |
|
"grad_norm": 0.2873566150665283, |
|
"learning_rate": 9.98673407584059e-05, |
|
"loss": 10.1859, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.14152923538230885, |
|
"grad_norm": 0.2754334807395935, |
|
"learning_rate": 9.985128272907918e-05, |
|
"loss": 10.1841, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.14272863568215893, |
|
"grad_norm": 0.27839502692222595, |
|
"learning_rate": 9.983430896969605e-05, |
|
"loss": 10.1797, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.14392803598200898, |
|
"grad_norm": 0.28016844391822815, |
|
"learning_rate": 9.981641979205158e-05, |
|
"loss": 10.1762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14512743628185906, |
|
"grad_norm": 0.2875809669494629, |
|
"learning_rate": 9.979761552475628e-05, |
|
"loss": 10.169, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.14632683658170914, |
|
"grad_norm": 0.29734012484550476, |
|
"learning_rate": 9.977789651323023e-05, |
|
"loss": 10.1667, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.14752623688155922, |
|
"grad_norm": 0.34327661991119385, |
|
"learning_rate": 9.975726311969664e-05, |
|
"loss": 10.1617, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.1487256371814093, |
|
"grad_norm": 0.32941052317619324, |
|
"learning_rate": 9.973571572317519e-05, |
|
"loss": 10.1619, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.14992503748125938, |
|
"grad_norm": 0.3153591752052307, |
|
"learning_rate": 9.971325471947517e-05, |
|
"loss": 10.1544, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.15112443778110946, |
|
"grad_norm": 0.26798173785209656, |
|
"learning_rate": 9.968988052118804e-05, |
|
"loss": 10.1517, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.15232383808095953, |
|
"grad_norm": 0.2733558714389801, |
|
"learning_rate": 9.966559355768005e-05, |
|
"loss": 10.1469, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.15352323838080958, |
|
"grad_norm": 0.275776207447052, |
|
"learning_rate": 9.964039427508418e-05, |
|
"loss": 10.1425, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.15472263868065966, |
|
"grad_norm": 0.2864780128002167, |
|
"learning_rate": 9.961428313629203e-05, |
|
"loss": 10.1389, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.15592203898050974, |
|
"grad_norm": 0.2879246473312378, |
|
"learning_rate": 9.958726062094534e-05, |
|
"loss": 10.1327, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15712143928035982, |
|
"grad_norm": 0.28718408942222595, |
|
"learning_rate": 9.955932722542708e-05, |
|
"loss": 10.1328, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1583208395802099, |
|
"grad_norm": 0.2976157069206238, |
|
"learning_rate": 9.953048346285245e-05, |
|
"loss": 10.1255, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15952023988005998, |
|
"grad_norm": 0.2953495681285858, |
|
"learning_rate": 9.950072986305939e-05, |
|
"loss": 10.1214, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.16071964017991006, |
|
"grad_norm": 0.2883056700229645, |
|
"learning_rate": 9.947006697259882e-05, |
|
"loss": 10.1202, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1619190404797601, |
|
"grad_norm": 0.3668458163738251, |
|
"learning_rate": 9.943849535472467e-05, |
|
"loss": 10.1184, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.16311844077961019, |
|
"grad_norm": 0.3660871982574463, |
|
"learning_rate": 9.940601558938348e-05, |
|
"loss": 10.1143, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.16431784107946026, |
|
"grad_norm": 0.32822495698928833, |
|
"learning_rate": 9.937262827320379e-05, |
|
"loss": 10.1094, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.16551724137931034, |
|
"grad_norm": 0.2712533175945282, |
|
"learning_rate": 9.933833401948513e-05, |
|
"loss": 10.1083, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.16671664167916042, |
|
"grad_norm": 0.27984705567359924, |
|
"learning_rate": 9.930313345818682e-05, |
|
"loss": 10.1001, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1679160419790105, |
|
"grad_norm": 0.2825195789337158, |
|
"learning_rate": 9.92670272359163e-05, |
|
"loss": 10.0952, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16911544227886058, |
|
"grad_norm": 0.27770835161209106, |
|
"learning_rate": 9.923001601591738e-05, |
|
"loss": 10.0949, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.17031484257871066, |
|
"grad_norm": 0.2809567451477051, |
|
"learning_rate": 9.919210047805792e-05, |
|
"loss": 10.0913, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1715142428785607, |
|
"grad_norm": 0.2936723530292511, |
|
"learning_rate": 9.915328131881745e-05, |
|
"loss": 10.0867, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1727136431784108, |
|
"grad_norm": 0.29460567235946655, |
|
"learning_rate": 9.911355925127433e-05, |
|
"loss": 10.0855, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 0.2977669835090637, |
|
"learning_rate": 9.907293500509268e-05, |
|
"loss": 10.08, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.17511244377811094, |
|
"grad_norm": 0.3061806559562683, |
|
"learning_rate": 9.903140932650891e-05, |
|
"loss": 10.0707, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.17631184407796102, |
|
"grad_norm": 0.2858673632144928, |
|
"learning_rate": 9.898898297831807e-05, |
|
"loss": 10.072, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1775112443778111, |
|
"grad_norm": 0.3454626202583313, |
|
"learning_rate": 9.894565673985985e-05, |
|
"loss": 10.071, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.17871064467766118, |
|
"grad_norm": 0.5128405690193176, |
|
"learning_rate": 9.890143140700419e-05, |
|
"loss": 10.0703, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.17991004497751126, |
|
"grad_norm": 0.5272448658943176, |
|
"learning_rate": 9.885630779213677e-05, |
|
"loss": 10.0602, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1811094452773613, |
|
"grad_norm": 0.27561256289482117, |
|
"learning_rate": 9.881028672414396e-05, |
|
"loss": 10.0577, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1823088455772114, |
|
"grad_norm": 0.2745487093925476, |
|
"learning_rate": 9.876336904839772e-05, |
|
"loss": 10.0557, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.18350824587706147, |
|
"grad_norm": 0.2808258831501007, |
|
"learning_rate": 9.871555562673995e-05, |
|
"loss": 10.05, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.18470764617691154, |
|
"grad_norm": 0.28502652049064636, |
|
"learning_rate": 9.866684733746679e-05, |
|
"loss": 10.0468, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.18590704647676162, |
|
"grad_norm": 0.28735095262527466, |
|
"learning_rate": 9.861724507531233e-05, |
|
"loss": 10.0436, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1871064467766117, |
|
"grad_norm": 0.2824231684207916, |
|
"learning_rate": 9.856674975143236e-05, |
|
"loss": 10.0452, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.18830584707646178, |
|
"grad_norm": 0.2854205369949341, |
|
"learning_rate": 9.851536229338747e-05, |
|
"loss": 10.0425, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.18950524737631183, |
|
"grad_norm": 0.3004373610019684, |
|
"learning_rate": 9.846308364512606e-05, |
|
"loss": 10.0368, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1907046476761619, |
|
"grad_norm": 0.3045557141304016, |
|
"learning_rate": 9.840991476696706e-05, |
|
"loss": 10.0328, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.191904047976012, |
|
"grad_norm": 0.2962295413017273, |
|
"learning_rate": 9.835585663558221e-05, |
|
"loss": 10.0301, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19310344827586207, |
|
"grad_norm": 0.31199753284454346, |
|
"learning_rate": 9.830091024397818e-05, |
|
"loss": 10.0286, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.19430284857571214, |
|
"grad_norm": 0.33795246481895447, |
|
"learning_rate": 9.82450766014783e-05, |
|
"loss": 10.0224, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.19550224887556222, |
|
"grad_norm": 0.28454065322875977, |
|
"learning_rate": 9.818835673370401e-05, |
|
"loss": 10.0172, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1967016491754123, |
|
"grad_norm": 0.2783207595348358, |
|
"learning_rate": 9.813075168255601e-05, |
|
"loss": 10.0135, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.19790104947526238, |
|
"grad_norm": 0.2809619605541229, |
|
"learning_rate": 9.807226250619521e-05, |
|
"loss": 10.0112, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.19910044977511243, |
|
"grad_norm": 0.2793205976486206, |
|
"learning_rate": 9.801289027902316e-05, |
|
"loss": 10.0068, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2002998500749625, |
|
"grad_norm": 0.2848748564720154, |
|
"learning_rate": 9.795263609166243e-05, |
|
"loss": 10.0044, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.2014992503748126, |
|
"grad_norm": 0.3925122618675232, |
|
"learning_rate": 9.789150105093647e-05, |
|
"loss": 10.0025, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.20269865067466267, |
|
"grad_norm": 0.48036912083625793, |
|
"learning_rate": 9.78294862798494e-05, |
|
"loss": 9.9991, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.20389805097451275, |
|
"grad_norm": 0.32086822390556335, |
|
"learning_rate": 9.776659291756528e-05, |
|
"loss": 9.9968, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20509745127436282, |
|
"grad_norm": 0.28707683086395264, |
|
"learning_rate": 9.770282211938721e-05, |
|
"loss": 9.9895, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2062968515742129, |
|
"grad_norm": 0.2987452745437622, |
|
"learning_rate": 9.763817505673613e-05, |
|
"loss": 9.9897, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.20749625187406298, |
|
"grad_norm": 0.3029066324234009, |
|
"learning_rate": 9.75726529171293e-05, |
|
"loss": 9.9879, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.20869565217391303, |
|
"grad_norm": 0.321458637714386, |
|
"learning_rate": 9.750625690415848e-05, |
|
"loss": 9.9815, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2098950524737631, |
|
"grad_norm": 0.35623157024383545, |
|
"learning_rate": 9.74389882374678e-05, |
|
"loss": 9.9831, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2110944527736132, |
|
"grad_norm": 0.27146583795547485, |
|
"learning_rate": 9.737084815273137e-05, |
|
"loss": 9.9741, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.21229385307346327, |
|
"grad_norm": 0.2866266071796417, |
|
"learning_rate": 9.730183790163062e-05, |
|
"loss": 9.9692, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.21349325337331335, |
|
"grad_norm": 0.28268909454345703, |
|
"learning_rate": 9.72319587518312e-05, |
|
"loss": 9.9681, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.21469265367316342, |
|
"grad_norm": 0.2824539244174957, |
|
"learning_rate": 9.716121198695986e-05, |
|
"loss": 9.9671, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2158920539730135, |
|
"grad_norm": 0.2851243317127228, |
|
"learning_rate": 9.708959890658073e-05, |
|
"loss": 9.9606, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21709145427286355, |
|
"grad_norm": 0.28162598609924316, |
|
"learning_rate": 9.701712082617149e-05, |
|
"loss": 9.9617, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.21829085457271363, |
|
"grad_norm": 0.28222355246543884, |
|
"learning_rate": 9.69437790770992e-05, |
|
"loss": 9.9595, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2194902548725637, |
|
"grad_norm": 0.32921475172042847, |
|
"learning_rate": 9.68695750065959e-05, |
|
"loss": 9.9553, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.2206896551724138, |
|
"grad_norm": 0.4057653248310089, |
|
"learning_rate": 9.679450997773378e-05, |
|
"loss": 9.9576, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.22188905547226387, |
|
"grad_norm": 0.37460216879844666, |
|
"learning_rate": 9.67185853694002e-05, |
|
"loss": 9.9495, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.22308845577211395, |
|
"grad_norm": 0.28542500734329224, |
|
"learning_rate": 9.66418025762723e-05, |
|
"loss": 9.9535, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.22428785607196403, |
|
"grad_norm": 0.3168298304080963, |
|
"learning_rate": 9.656416300879148e-05, |
|
"loss": 9.9461, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2254872563718141, |
|
"grad_norm": 0.2682456076145172, |
|
"learning_rate": 9.648566809313738e-05, |
|
"loss": 9.941, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.22668665667166416, |
|
"grad_norm": 0.27266478538513184, |
|
"learning_rate": 9.640631927120177e-05, |
|
"loss": 9.9355, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.22788605697151423, |
|
"grad_norm": 0.2777270972728729, |
|
"learning_rate": 9.632611800056201e-05, |
|
"loss": 9.9321, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2290854572713643, |
|
"grad_norm": 0.2846197485923767, |
|
"learning_rate": 9.624506575445429e-05, |
|
"loss": 9.93, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.2302848575712144, |
|
"grad_norm": 0.28478533029556274, |
|
"learning_rate": 9.616316402174656e-05, |
|
"loss": 9.9284, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.23148425787106447, |
|
"grad_norm": 0.2874702215194702, |
|
"learning_rate": 9.608041430691126e-05, |
|
"loss": 9.9276, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.23268365817091455, |
|
"grad_norm": 0.29689356684684753, |
|
"learning_rate": 9.59968181299975e-05, |
|
"loss": 9.9224, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.23388305847076463, |
|
"grad_norm": 0.2949802577495575, |
|
"learning_rate": 9.591237702660335e-05, |
|
"loss": 9.9178, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2350824587706147, |
|
"grad_norm": 0.29631638526916504, |
|
"learning_rate": 9.582709254784748e-05, |
|
"loss": 9.9202, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.23628185907046476, |
|
"grad_norm": 0.29446399211883545, |
|
"learning_rate": 9.574096626034077e-05, |
|
"loss": 9.9169, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.23748125937031483, |
|
"grad_norm": 0.29663321375846863, |
|
"learning_rate": 9.565399974615743e-05, |
|
"loss": 9.9164, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2386806596701649, |
|
"grad_norm": 0.4233105182647705, |
|
"learning_rate": 9.556619460280605e-05, |
|
"loss": 9.9167, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.239880059970015, |
|
"grad_norm": 0.902298092842102, |
|
"learning_rate": 9.547755244320012e-05, |
|
"loss": 9.9114, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24107946026986507, |
|
"grad_norm": 0.27147176861763, |
|
"learning_rate": 9.538807489562859e-05, |
|
"loss": 9.9017, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.24227886056971515, |
|
"grad_norm": 0.2803528308868408, |
|
"learning_rate": 9.529776360372575e-05, |
|
"loss": 9.8995, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.24347826086956523, |
|
"grad_norm": 0.26581478118896484, |
|
"learning_rate": 9.520662022644119e-05, |
|
"loss": 9.9054, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.2446776611694153, |
|
"grad_norm": 0.27326396107673645, |
|
"learning_rate": 9.511464643800925e-05, |
|
"loss": 9.8952, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.24587706146926536, |
|
"grad_norm": 0.277313768863678, |
|
"learning_rate": 9.502184392791834e-05, |
|
"loss": 9.8951, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.24707646176911544, |
|
"grad_norm": 0.27905702590942383, |
|
"learning_rate": 9.492821440087976e-05, |
|
"loss": 9.8936, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.2482758620689655, |
|
"grad_norm": 0.28943029046058655, |
|
"learning_rate": 9.48337595767966e-05, |
|
"loss": 9.8861, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.2494752623688156, |
|
"grad_norm": 0.2902354896068573, |
|
"learning_rate": 9.473848119073189e-05, |
|
"loss": 9.8864, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.25067466266866567, |
|
"grad_norm": 0.2864895462989807, |
|
"learning_rate": 9.4642380992877e-05, |
|
"loss": 9.8891, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.2518740629685157, |
|
"grad_norm": 0.29123008251190186, |
|
"learning_rate": 9.454546074851926e-05, |
|
"loss": 9.8855, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25307346326836583, |
|
"grad_norm": 0.30264273285865784, |
|
"learning_rate": 9.44477222380097e-05, |
|
"loss": 9.88, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.2542728635682159, |
|
"grad_norm": 0.3108195662498474, |
|
"learning_rate": 9.434916725673024e-05, |
|
"loss": 9.8845, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.255472263868066, |
|
"grad_norm": 0.27868786454200745, |
|
"learning_rate": 9.42497976150607e-05, |
|
"loss": 9.8742, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.25667166416791604, |
|
"grad_norm": 0.27031615376472473, |
|
"learning_rate": 9.414961513834568e-05, |
|
"loss": 9.8714, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.25787106446776614, |
|
"grad_norm": 0.2734402120113373, |
|
"learning_rate": 9.404862166686088e-05, |
|
"loss": 9.8673, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2590704647676162, |
|
"grad_norm": 0.28026947379112244, |
|
"learning_rate": 9.394681905577937e-05, |
|
"loss": 9.8689, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.26026986506746624, |
|
"grad_norm": 0.2765568196773529, |
|
"learning_rate": 9.384420917513752e-05, |
|
"loss": 9.871, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.26146926536731635, |
|
"grad_norm": 0.28846895694732666, |
|
"learning_rate": 9.374079390980058e-05, |
|
"loss": 9.8626, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.2626686656671664, |
|
"grad_norm": 0.28785768151283264, |
|
"learning_rate": 9.363657515942814e-05, |
|
"loss": 9.8594, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.2638680659670165, |
|
"grad_norm": 0.28602316975593567, |
|
"learning_rate": 9.353155483843919e-05, |
|
"loss": 9.8568, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26506746626686656, |
|
"grad_norm": 0.2956307530403137, |
|
"learning_rate": 9.342573487597696e-05, |
|
"loss": 9.8599, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.26626686656671666, |
|
"grad_norm": 0.28586798906326294, |
|
"learning_rate": 9.331911721587345e-05, |
|
"loss": 9.8601, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.2674662668665667, |
|
"grad_norm": 0.30228516459465027, |
|
"learning_rate": 9.321170381661383e-05, |
|
"loss": 9.8549, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.26866566716641677, |
|
"grad_norm": 0.3037481904029846, |
|
"learning_rate": 9.310349665130035e-05, |
|
"loss": 9.8593, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2698650674662669, |
|
"grad_norm": 0.3253049850463867, |
|
"learning_rate": 9.299449770761611e-05, |
|
"loss": 9.8551, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2710644677661169, |
|
"grad_norm": 0.26533886790275574, |
|
"learning_rate": 9.288470898778863e-05, |
|
"loss": 9.8453, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.27226386806596703, |
|
"grad_norm": 0.2740223705768585, |
|
"learning_rate": 9.277413250855296e-05, |
|
"loss": 9.8406, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.2734632683658171, |
|
"grad_norm": 0.27240288257598877, |
|
"learning_rate": 9.266277030111474e-05, |
|
"loss": 9.8468, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2746626686656672, |
|
"grad_norm": 0.33709290623664856, |
|
"learning_rate": 9.255062441111281e-05, |
|
"loss": 9.837, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 0.44585081934928894, |
|
"learning_rate": 9.243769689858166e-05, |
|
"loss": 9.8394, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.27706146926536734, |
|
"grad_norm": 0.3157913088798523, |
|
"learning_rate": 9.232398983791361e-05, |
|
"loss": 9.8386, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.2782608695652174, |
|
"grad_norm": 0.2746964693069458, |
|
"learning_rate": 9.220950531782069e-05, |
|
"loss": 9.8347, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.27946026986506745, |
|
"grad_norm": 0.28360387682914734, |
|
"learning_rate": 9.20942454412962e-05, |
|
"loss": 9.8367, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.28065967016491755, |
|
"grad_norm": 0.2914506494998932, |
|
"learning_rate": 9.197821232557624e-05, |
|
"loss": 9.8285, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2818590704647676, |
|
"grad_norm": 0.29733598232269287, |
|
"learning_rate": 9.186140810210065e-05, |
|
"loss": 9.8322, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2830584707646177, |
|
"grad_norm": 0.30295151472091675, |
|
"learning_rate": 9.174383491647399e-05, |
|
"loss": 9.8292, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.28425787106446776, |
|
"grad_norm": 0.32045435905456543, |
|
"learning_rate": 9.162549492842602e-05, |
|
"loss": 9.8248, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.28545727136431787, |
|
"grad_norm": 0.2680381238460541, |
|
"learning_rate": 9.150639031177211e-05, |
|
"loss": 9.8168, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.2866566716641679, |
|
"grad_norm": 0.27739235758781433, |
|
"learning_rate": 9.138652325437324e-05, |
|
"loss": 9.8155, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.28785607196401797, |
|
"grad_norm": 0.271766722202301, |
|
"learning_rate": 9.12658959580959e-05, |
|
"loss": 9.8195, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2890554722638681, |
|
"grad_norm": 0.28111475706100464, |
|
"learning_rate": 9.114451063877151e-05, |
|
"loss": 9.8112, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.2902548725637181, |
|
"grad_norm": 0.27953851222991943, |
|
"learning_rate": 9.102236952615589e-05, |
|
"loss": 9.814, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.29145427286356823, |
|
"grad_norm": 0.27629002928733826, |
|
"learning_rate": 9.08994748638881e-05, |
|
"loss": 9.8131, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2926536731634183, |
|
"grad_norm": 0.2811156213283539, |
|
"learning_rate": 9.077582890944945e-05, |
|
"loss": 9.8045, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.2938530734632684, |
|
"grad_norm": 0.3269944489002228, |
|
"learning_rate": 9.065143393412179e-05, |
|
"loss": 9.8066, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.29505247376311844, |
|
"grad_norm": 0.3400765657424927, |
|
"learning_rate": 9.052629222294604e-05, |
|
"loss": 9.8138, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.2962518740629685, |
|
"grad_norm": 0.28989219665527344, |
|
"learning_rate": 9.040040607467999e-05, |
|
"loss": 9.8014, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.2974512743628186, |
|
"grad_norm": 0.33483076095581055, |
|
"learning_rate": 9.02737778017562e-05, |
|
"loss": 9.8082, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.29865067466266865, |
|
"grad_norm": 0.2836906909942627, |
|
"learning_rate": 9.014640973023951e-05, |
|
"loss": 9.8131, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.29985007496251875, |
|
"grad_norm": 0.3271000385284424, |
|
"learning_rate": 9.00183041997843e-05, |
|
"loss": 9.7969, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3010494752623688, |
|
"grad_norm": 0.2659075856208801, |
|
"learning_rate": 8.988946356359146e-05, |
|
"loss": 9.7947, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.3022488755622189, |
|
"grad_norm": 0.2756604552268982, |
|
"learning_rate": 8.97598901883653e-05, |
|
"loss": 9.7903, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.30344827586206896, |
|
"grad_norm": 0.2782731354236603, |
|
"learning_rate": 8.962958645426989e-05, |
|
"loss": 9.7927, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.30464767616191907, |
|
"grad_norm": 0.28496497869491577, |
|
"learning_rate": 8.949855475488549e-05, |
|
"loss": 9.788, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3058470764617691, |
|
"grad_norm": 0.2814723253250122, |
|
"learning_rate": 8.936679749716452e-05, |
|
"loss": 9.7867, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.30704647676161917, |
|
"grad_norm": 0.27949169278144836, |
|
"learning_rate": 8.923431710138734e-05, |
|
"loss": 9.7937, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.3082458770614693, |
|
"grad_norm": 0.2898804843425751, |
|
"learning_rate": 8.910111600111785e-05, |
|
"loss": 9.783, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.3094452773613193, |
|
"grad_norm": 0.2944411337375641, |
|
"learning_rate": 8.896719664315867e-05, |
|
"loss": 9.7809, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.31064467766116943, |
|
"grad_norm": 0.29111993312835693, |
|
"learning_rate": 8.883256148750633e-05, |
|
"loss": 9.7834, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.3118440779610195, |
|
"grad_norm": 0.3007718324661255, |
|
"learning_rate": 8.869721300730596e-05, |
|
"loss": 9.7882, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3130434782608696, |
|
"grad_norm": 0.28827887773513794, |
|
"learning_rate": 8.856115368880598e-05, |
|
"loss": 9.7902, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.31424287856071964, |
|
"grad_norm": 0.31684109568595886, |
|
"learning_rate": 8.842438603131232e-05, |
|
"loss": 9.7778, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3154422788605697, |
|
"grad_norm": 0.32087424397468567, |
|
"learning_rate": 8.828691254714259e-05, |
|
"loss": 9.7689, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.3166416791604198, |
|
"grad_norm": 0.27183249592781067, |
|
"learning_rate": 8.814873576157987e-05, |
|
"loss": 9.7738, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.31784107946026985, |
|
"grad_norm": 0.27800437808036804, |
|
"learning_rate": 8.800985821282637e-05, |
|
"loss": 9.7711, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.31904047976011995, |
|
"grad_norm": 0.28154927492141724, |
|
"learning_rate": 8.787028245195676e-05, |
|
"loss": 9.7662, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.32023988005997, |
|
"grad_norm": 0.2830137014389038, |
|
"learning_rate": 8.773001104287137e-05, |
|
"loss": 9.767, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.3214392803598201, |
|
"grad_norm": 0.27607715129852295, |
|
"learning_rate": 8.758904656224904e-05, |
|
"loss": 9.7658, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.32263868065967016, |
|
"grad_norm": 0.2993113696575165, |
|
"learning_rate": 8.744739159949981e-05, |
|
"loss": 9.7659, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.3238380809595202, |
|
"grad_norm": 0.316902220249176, |
|
"learning_rate": 8.730504875671732e-05, |
|
"loss": 9.7573, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3250374812593703, |
|
"grad_norm": 0.3067699670791626, |
|
"learning_rate": 8.716202064863111e-05, |
|
"loss": 9.7598, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.32623688155922037, |
|
"grad_norm": 0.3003675937652588, |
|
"learning_rate": 8.701830990255843e-05, |
|
"loss": 9.7639, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3274362818590705, |
|
"grad_norm": 0.2981228232383728, |
|
"learning_rate": 8.687391915835616e-05, |
|
"loss": 9.7576, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.32863568215892053, |
|
"grad_norm": 0.2995956242084503, |
|
"learning_rate": 8.672885106837216e-05, |
|
"loss": 9.7714, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.32983508245877063, |
|
"grad_norm": 0.30962345004081726, |
|
"learning_rate": 8.658310829739665e-05, |
|
"loss": 9.7645, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3310344827586207, |
|
"grad_norm": 0.26187556982040405, |
|
"learning_rate": 8.643669352261321e-05, |
|
"loss": 9.7506, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.3322338830584708, |
|
"grad_norm": 0.276991605758667, |
|
"learning_rate": 8.628960943354965e-05, |
|
"loss": 9.7492, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.33343328335832084, |
|
"grad_norm": 0.2857518196105957, |
|
"learning_rate": 8.614185873202851e-05, |
|
"loss": 9.7469, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.3346326836581709, |
|
"grad_norm": 0.28504660725593567, |
|
"learning_rate": 8.599344413211755e-05, |
|
"loss": 9.7518, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.335832083958021, |
|
"grad_norm": 0.27488988637924194, |
|
"learning_rate": 8.584436836007981e-05, |
|
"loss": 9.7501, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.33703148425787105, |
|
"grad_norm": 0.29049110412597656, |
|
"learning_rate": 8.569463415432356e-05, |
|
"loss": 9.7418, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.33823088455772116, |
|
"grad_norm": 0.2897820472717285, |
|
"learning_rate": 8.554424426535201e-05, |
|
"loss": 9.7481, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.3394302848575712, |
|
"grad_norm": 0.28421247005462646, |
|
"learning_rate": 8.539320145571276e-05, |
|
"loss": 9.7456, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.3406296851574213, |
|
"grad_norm": 0.28690865635871887, |
|
"learning_rate": 8.524150849994707e-05, |
|
"loss": 9.7501, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.34182908545727136, |
|
"grad_norm": 0.2818574905395508, |
|
"learning_rate": 8.50891681845389e-05, |
|
"loss": 9.7422, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3430284857571214, |
|
"grad_norm": 0.5660536885261536, |
|
"learning_rate": 8.493618330786365e-05, |
|
"loss": 9.7497, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3442278860569715, |
|
"grad_norm": 0.31119802594184875, |
|
"learning_rate": 8.47825566801369e-05, |
|
"loss": 9.7429, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.3454272863568216, |
|
"grad_norm": 0.2645992636680603, |
|
"learning_rate": 8.462829112336266e-05, |
|
"loss": 9.7354, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.3466266866566717, |
|
"grad_norm": 0.27782142162323, |
|
"learning_rate": 8.44733894712816e-05, |
|
"loss": 9.7309, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 0.27114003896713257, |
|
"learning_rate": 8.431785456931898e-05, |
|
"loss": 9.7329, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.34902548725637184, |
|
"grad_norm": 0.27776530385017395, |
|
"learning_rate": 8.416168927453236e-05, |
|
"loss": 9.7294, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.3502248875562219, |
|
"grad_norm": 0.2819390594959259, |
|
"learning_rate": 8.400489645555914e-05, |
|
"loss": 9.7324, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.35142428785607194, |
|
"grad_norm": 0.2786363363265991, |
|
"learning_rate": 8.384747899256386e-05, |
|
"loss": 9.7327, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.35262368815592204, |
|
"grad_norm": 0.29060226678848267, |
|
"learning_rate": 8.368943977718528e-05, |
|
"loss": 9.7265, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.3538230884557721, |
|
"grad_norm": 0.28789106011390686, |
|
"learning_rate": 8.353078171248335e-05, |
|
"loss": 9.7269, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3550224887556222, |
|
"grad_norm": 0.28383123874664307, |
|
"learning_rate": 8.337150771288572e-05, |
|
"loss": 9.7357, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.35622188905547225, |
|
"grad_norm": 0.28761202096939087, |
|
"learning_rate": 8.32116207041343e-05, |
|
"loss": 9.7277, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.35742128935532236, |
|
"grad_norm": 0.29686328768730164, |
|
"learning_rate": 8.30511236232316e-05, |
|
"loss": 9.7278, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.3586206896551724, |
|
"grad_norm": 0.3019421100616455, |
|
"learning_rate": 8.289001941838659e-05, |
|
"loss": 9.7348, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.3598200899550225, |
|
"grad_norm": 0.3201374411582947, |
|
"learning_rate": 8.27283110489607e-05, |
|
"loss": 9.7275, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.36101949025487257, |
|
"grad_norm": 0.2733359932899475, |
|
"learning_rate": 8.256600148541339e-05, |
|
"loss": 9.7121, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.3622188905547226, |
|
"grad_norm": 0.2780385911464691, |
|
"learning_rate": 8.240309370924759e-05, |
|
"loss": 9.7179, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.3634182908545727, |
|
"grad_norm": 0.27753978967666626, |
|
"learning_rate": 8.223959071295493e-05, |
|
"loss": 9.7121, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.3646176911544228, |
|
"grad_norm": 0.2738651633262634, |
|
"learning_rate": 8.207549549996083e-05, |
|
"loss": 9.7152, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.3658170914542729, |
|
"grad_norm": 0.4075029790401459, |
|
"learning_rate": 8.191081108456921e-05, |
|
"loss": 9.7168, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.36701649175412293, |
|
"grad_norm": 0.35438272356987, |
|
"learning_rate": 8.174554049190725e-05, |
|
"loss": 9.7143, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.36821589205397304, |
|
"grad_norm": 0.46225133538246155, |
|
"learning_rate": 8.157968675786972e-05, |
|
"loss": 9.7133, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.3694152923538231, |
|
"grad_norm": 0.2845197319984436, |
|
"learning_rate": 8.141325292906326e-05, |
|
"loss": 9.7149, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.37061469265367314, |
|
"grad_norm": 0.29232627153396606, |
|
"learning_rate": 8.12462420627504e-05, |
|
"loss": 9.7107, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.37181409295352325, |
|
"grad_norm": 0.28868958353996277, |
|
"learning_rate": 8.107865722679347e-05, |
|
"loss": 9.7176, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3730134932533733, |
|
"grad_norm": 0.3159126341342926, |
|
"learning_rate": 8.091050149959808e-05, |
|
"loss": 9.713, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.3742128935532234, |
|
"grad_norm": 0.3219504952430725, |
|
"learning_rate": 8.074177797005678e-05, |
|
"loss": 9.7166, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.37541229385307345, |
|
"grad_norm": 0.2772824168205261, |
|
"learning_rate": 8.057248973749215e-05, |
|
"loss": 9.7027, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.37661169415292356, |
|
"grad_norm": 0.2774364948272705, |
|
"learning_rate": 8.040263991159995e-05, |
|
"loss": 9.7026, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.3778110944527736, |
|
"grad_norm": 0.2747974693775177, |
|
"learning_rate": 8.0232231612392e-05, |
|
"loss": 9.702, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.37901049475262366, |
|
"grad_norm": 0.2756046652793884, |
|
"learning_rate": 8.006126797013883e-05, |
|
"loss": 9.7022, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.38020989505247377, |
|
"grad_norm": 0.269083172082901, |
|
"learning_rate": 7.98897521253122e-05, |
|
"loss": 9.7024, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.3814092953523238, |
|
"grad_norm": 0.2777722477912903, |
|
"learning_rate": 7.97176872285274e-05, |
|
"loss": 9.7029, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.3826086956521739, |
|
"grad_norm": 0.2875417172908783, |
|
"learning_rate": 7.954507644048544e-05, |
|
"loss": 9.7008, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.383808095952024, |
|
"grad_norm": 0.29414165019989014, |
|
"learning_rate": 7.937192293191485e-05, |
|
"loss": 9.7004, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3850074962518741, |
|
"grad_norm": 0.2859031558036804, |
|
"learning_rate": 7.919822988351357e-05, |
|
"loss": 9.7048, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.38620689655172413, |
|
"grad_norm": 0.2967624068260193, |
|
"learning_rate": 7.902400048589051e-05, |
|
"loss": 9.7018, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.38740629685157424, |
|
"grad_norm": 0.40655517578125, |
|
"learning_rate": 7.884923793950685e-05, |
|
"loss": 9.693, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.3886056971514243, |
|
"grad_norm": 0.3629460632801056, |
|
"learning_rate": 7.86739454546173e-05, |
|
"loss": 9.7021, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.38980509745127434, |
|
"grad_norm": 0.3573906421661377, |
|
"learning_rate": 7.84981262512112e-05, |
|
"loss": 9.7026, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.39100449775112445, |
|
"grad_norm": 0.2747887969017029, |
|
"learning_rate": 7.832178355895326e-05, |
|
"loss": 9.6855, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.3922038980509745, |
|
"grad_norm": 0.27436476945877075, |
|
"learning_rate": 7.814492061712428e-05, |
|
"loss": 9.6864, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.3934032983508246, |
|
"grad_norm": 0.2805567681789398, |
|
"learning_rate": 7.796754067456168e-05, |
|
"loss": 9.6899, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.39460269865067465, |
|
"grad_norm": 0.2744491696357727, |
|
"learning_rate": 7.778964698959972e-05, |
|
"loss": 9.6882, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.39580209895052476, |
|
"grad_norm": 0.2762869894504547, |
|
"learning_rate": 7.761124283000983e-05, |
|
"loss": 9.6909, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3970014992503748, |
|
"grad_norm": 0.27481362223625183, |
|
"learning_rate": 7.743233147294035e-05, |
|
"loss": 9.6929, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.39820089955022486, |
|
"grad_norm": 0.28461942076683044, |
|
"learning_rate": 7.725291620485653e-05, |
|
"loss": 9.6901, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.39940029985007497, |
|
"grad_norm": 0.2874203026294708, |
|
"learning_rate": 7.707300032148004e-05, |
|
"loss": 9.6879, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.400599700149925, |
|
"grad_norm": 0.2960827052593231, |
|
"learning_rate": 7.689258712772851e-05, |
|
"loss": 9.6883, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.4017991004497751, |
|
"grad_norm": 0.2913392186164856, |
|
"learning_rate": 7.671167993765474e-05, |
|
"loss": 9.6886, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.4029985007496252, |
|
"grad_norm": 0.2986817955970764, |
|
"learning_rate": 7.653028207438589e-05, |
|
"loss": 9.6875, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.4041979010494753, |
|
"grad_norm": 0.31125518679618835, |
|
"learning_rate": 7.634839687006242e-05, |
|
"loss": 9.693, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.40539730134932533, |
|
"grad_norm": 0.27948254346847534, |
|
"learning_rate": 7.616602766577683e-05, |
|
"loss": 9.677, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.4065967016491754, |
|
"grad_norm": 0.2667854428291321, |
|
"learning_rate": 7.59831778115124e-05, |
|
"loss": 9.6728, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.4077961019490255, |
|
"grad_norm": 0.26580169796943665, |
|
"learning_rate": 7.579985066608153e-05, |
|
"loss": 9.6734, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.40899550224887554, |
|
"grad_norm": 0.27677300572395325, |
|
"learning_rate": 7.56160495970641e-05, |
|
"loss": 9.6744, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.41019490254872565, |
|
"grad_norm": 0.28340858221054077, |
|
"learning_rate": 7.543177798074564e-05, |
|
"loss": 9.6755, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.4113943028485757, |
|
"grad_norm": 0.28086498379707336, |
|
"learning_rate": 7.52470392020552e-05, |
|
"loss": 9.6741, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.4125937031484258, |
|
"grad_norm": 0.2807992100715637, |
|
"learning_rate": 7.506183665450336e-05, |
|
"loss": 9.6789, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 0.27423274517059326, |
|
"learning_rate": 7.487617374011968e-05, |
|
"loss": 9.6791, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.41499250374812596, |
|
"grad_norm": 0.2901366353034973, |
|
"learning_rate": 7.469005386939036e-05, |
|
"loss": 9.6742, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.416191904047976, |
|
"grad_norm": 0.33871832489967346, |
|
"learning_rate": 7.45034804611955e-05, |
|
"loss": 9.6731, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.41739130434782606, |
|
"grad_norm": 0.3808429539203644, |
|
"learning_rate": 7.43164569427464e-05, |
|
"loss": 9.6811, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.41859070464767617, |
|
"grad_norm": 0.37340763211250305, |
|
"learning_rate": 7.412898674952248e-05, |
|
"loss": 9.6826, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.4197901049475262, |
|
"grad_norm": 0.31507405638694763, |
|
"learning_rate": 7.394107332520828e-05, |
|
"loss": 9.6792, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4209895052473763, |
|
"grad_norm": 0.2747836410999298, |
|
"learning_rate": 7.37527201216301e-05, |
|
"loss": 9.6618, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.4221889055472264, |
|
"grad_norm": 0.26785317063331604, |
|
"learning_rate": 7.356393059869272e-05, |
|
"loss": 9.668, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.4233883058470765, |
|
"grad_norm": 0.27754732966423035, |
|
"learning_rate": 7.337470822431572e-05, |
|
"loss": 9.6617, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.42458770614692654, |
|
"grad_norm": 0.2815973460674286, |
|
"learning_rate": 7.318505647436986e-05, |
|
"loss": 9.6655, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.4257871064467766, |
|
"grad_norm": 0.27644169330596924, |
|
"learning_rate": 7.299497883261319e-05, |
|
"loss": 9.6683, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.4269865067466267, |
|
"grad_norm": 0.27770036458969116, |
|
"learning_rate": 7.28044787906271e-05, |
|
"loss": 9.6686, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.42818590704647674, |
|
"grad_norm": 0.28763288259506226, |
|
"learning_rate": 7.261355984775208e-05, |
|
"loss": 9.6643, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.42938530734632685, |
|
"grad_norm": 0.28251275420188904, |
|
"learning_rate": 7.242222551102356e-05, |
|
"loss": 9.6609, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.4305847076461769, |
|
"grad_norm": 0.29022759199142456, |
|
"learning_rate": 7.223047929510743e-05, |
|
"loss": 9.6656, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.431784107946027, |
|
"grad_norm": 0.2947325110435486, |
|
"learning_rate": 7.20383247222355e-05, |
|
"loss": 9.666, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.43298350824587706, |
|
"grad_norm": 0.29398787021636963, |
|
"learning_rate": 7.184576532214077e-05, |
|
"loss": 9.6692, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.4341829085457271, |
|
"grad_norm": 0.30600595474243164, |
|
"learning_rate": 7.16528046319926e-05, |
|
"loss": 9.6675, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.4353823088455772, |
|
"grad_norm": 0.26775041222572327, |
|
"learning_rate": 7.145944619633176e-05, |
|
"loss": 9.6627, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.43658170914542727, |
|
"grad_norm": 0.2672569155693054, |
|
"learning_rate": 7.126569356700529e-05, |
|
"loss": 9.6575, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.43778110944527737, |
|
"grad_norm": 0.2710895538330078, |
|
"learning_rate": 7.107155030310126e-05, |
|
"loss": 9.6538, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.4389805097451274, |
|
"grad_norm": 0.2715386152267456, |
|
"learning_rate": 7.087701997088345e-05, |
|
"loss": 9.6533, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.44017991004497753, |
|
"grad_norm": 0.2757709324359894, |
|
"learning_rate": 7.068210614372568e-05, |
|
"loss": 9.6559, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.4413793103448276, |
|
"grad_norm": 0.27887001633644104, |
|
"learning_rate": 7.048681240204641e-05, |
|
"loss": 9.6604, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.4425787106446777, |
|
"grad_norm": 0.2874414920806885, |
|
"learning_rate": 7.029114233324276e-05, |
|
"loss": 9.6537, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.44377811094452774, |
|
"grad_norm": 0.2877376079559326, |
|
"learning_rate": 7.009509953162471e-05, |
|
"loss": 9.6594, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4449775112443778, |
|
"grad_norm": 0.2886502146720886, |
|
"learning_rate": 6.989868759834908e-05, |
|
"loss": 9.6522, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.4461769115442279, |
|
"grad_norm": 0.28471824526786804, |
|
"learning_rate": 6.97019101413533e-05, |
|
"loss": 9.6611, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.44737631184407795, |
|
"grad_norm": 0.2849258780479431, |
|
"learning_rate": 6.950477077528926e-05, |
|
"loss": 9.6583, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.44857571214392805, |
|
"grad_norm": 0.3675067126750946, |
|
"learning_rate": 6.93072731214568e-05, |
|
"loss": 9.6677, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.4497751124437781, |
|
"grad_norm": 0.6856014728546143, |
|
"learning_rate": 6.910942080773724e-05, |
|
"loss": 9.6579, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4509745127436282, |
|
"grad_norm": 0.27253374457359314, |
|
"learning_rate": 6.891121746852674e-05, |
|
"loss": 9.6466, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.45217391304347826, |
|
"grad_norm": 0.26753801107406616, |
|
"learning_rate": 6.871266674466955e-05, |
|
"loss": 9.6491, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.4533733133433283, |
|
"grad_norm": 0.2716609239578247, |
|
"learning_rate": 6.851377228339106e-05, |
|
"loss": 9.6484, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.4545727136431784, |
|
"grad_norm": 0.2831350266933441, |
|
"learning_rate": 6.831453773823091e-05, |
|
"loss": 9.6464, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.45577211394302847, |
|
"grad_norm": 0.28324607014656067, |
|
"learning_rate": 6.811496676897578e-05, |
|
"loss": 9.6475, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4569715142428786, |
|
"grad_norm": 0.27571067214012146, |
|
"learning_rate": 6.791506304159221e-05, |
|
"loss": 9.645, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.4581709145427286, |
|
"grad_norm": 0.28332218527793884, |
|
"learning_rate": 6.771483022815925e-05, |
|
"loss": 9.6559, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.45937031484257873, |
|
"grad_norm": 0.2815491855144501, |
|
"learning_rate": 6.751427200680108e-05, |
|
"loss": 9.6518, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.4605697151424288, |
|
"grad_norm": 0.28399744629859924, |
|
"learning_rate": 6.731339206161928e-05, |
|
"loss": 9.6512, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.4617691154422789, |
|
"grad_norm": 0.288397878408432, |
|
"learning_rate": 6.711219408262527e-05, |
|
"loss": 9.6452, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.46296851574212894, |
|
"grad_norm": 0.29081466794013977, |
|
"learning_rate": 6.691068176567257e-05, |
|
"loss": 9.66, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.464167916041979, |
|
"grad_norm": 0.29959914088249207, |
|
"learning_rate": 6.670885881238877e-05, |
|
"loss": 9.6601, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.4653673163418291, |
|
"grad_norm": 0.27491918206214905, |
|
"learning_rate": 6.650672893010768e-05, |
|
"loss": 9.6448, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.46656671664167915, |
|
"grad_norm": 0.2780735194683075, |
|
"learning_rate": 6.630429583180112e-05, |
|
"loss": 9.6355, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.46776611694152925, |
|
"grad_norm": 0.2666113078594208, |
|
"learning_rate": 6.610156323601075e-05, |
|
"loss": 9.6384, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4689655172413793, |
|
"grad_norm": 0.2784653902053833, |
|
"learning_rate": 6.589853486677981e-05, |
|
"loss": 9.6384, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.4701649175412294, |
|
"grad_norm": 0.28066155314445496, |
|
"learning_rate": 6.569521445358464e-05, |
|
"loss": 9.6417, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.47136431784107946, |
|
"grad_norm": 0.27688291668891907, |
|
"learning_rate": 6.549160573126623e-05, |
|
"loss": 9.6387, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.4725637181409295, |
|
"grad_norm": 0.279243141412735, |
|
"learning_rate": 6.528771243996157e-05, |
|
"loss": 9.645, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.4737631184407796, |
|
"grad_norm": 0.2818412184715271, |
|
"learning_rate": 6.508353832503494e-05, |
|
"loss": 9.6442, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.47496251874062967, |
|
"grad_norm": 0.2805486023426056, |
|
"learning_rate": 6.48790871370092e-05, |
|
"loss": 9.6417, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.4761619190404798, |
|
"grad_norm": 0.2866521179676056, |
|
"learning_rate": 6.467436263149678e-05, |
|
"loss": 9.6496, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.4773613193403298, |
|
"grad_norm": 0.29199638962745667, |
|
"learning_rate": 6.446936856913078e-05, |
|
"loss": 9.6433, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.47856071964017993, |
|
"grad_norm": 0.29411137104034424, |
|
"learning_rate": 6.426410871549581e-05, |
|
"loss": 9.6499, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.47976011994003, |
|
"grad_norm": 0.3119426369667053, |
|
"learning_rate": 6.405858684105892e-05, |
|
"loss": 9.655, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.48095952023988003, |
|
"grad_norm": 0.2667071521282196, |
|
"learning_rate": 6.385280672110024e-05, |
|
"loss": 9.6329, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.48215892053973014, |
|
"grad_norm": 0.2752053737640381, |
|
"learning_rate": 6.364677213564365e-05, |
|
"loss": 9.6306, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.4833583208395802, |
|
"grad_norm": 0.27557632327079773, |
|
"learning_rate": 6.344048686938745e-05, |
|
"loss": 9.6324, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.4845577211394303, |
|
"grad_norm": 0.2749324142932892, |
|
"learning_rate": 6.323395471163467e-05, |
|
"loss": 9.639, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.48575712143928035, |
|
"grad_norm": 0.27776116132736206, |
|
"learning_rate": 6.30271794562236e-05, |
|
"loss": 9.6358, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.48695652173913045, |
|
"grad_norm": 0.27685850858688354, |
|
"learning_rate": 6.282016490145803e-05, |
|
"loss": 9.6354, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.4881559220389805, |
|
"grad_norm": 0.2896622121334076, |
|
"learning_rate": 6.261291485003751e-05, |
|
"loss": 9.6398, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.4893553223388306, |
|
"grad_norm": 0.34289979934692383, |
|
"learning_rate": 6.240543310898746e-05, |
|
"loss": 9.6447, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.49055472263868066, |
|
"grad_norm": 0.30854368209838867, |
|
"learning_rate": 6.219772348958927e-05, |
|
"loss": 9.6312, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.4917541229385307, |
|
"grad_norm": 0.3343330919742584, |
|
"learning_rate": 6.198978980731034e-05, |
|
"loss": 9.6383, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4929535232383808, |
|
"grad_norm": 0.2979169487953186, |
|
"learning_rate": 6.178163588173381e-05, |
|
"loss": 9.6352, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.49415292353823087, |
|
"grad_norm": 0.29163146018981934, |
|
"learning_rate": 6.157326553648862e-05, |
|
"loss": 9.6349, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.495352323838081, |
|
"grad_norm": 0.32137271761894226, |
|
"learning_rate": 6.136468259917917e-05, |
|
"loss": 9.6287, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.496551724137931, |
|
"grad_norm": 0.26861318945884705, |
|
"learning_rate": 6.115589090131497e-05, |
|
"loss": 9.6261, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.49775112443778113, |
|
"grad_norm": 0.2670891582965851, |
|
"learning_rate": 6.094689427824031e-05, |
|
"loss": 9.6272, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4989505247376312, |
|
"grad_norm": 0.2772047519683838, |
|
"learning_rate": 6.073769656906385e-05, |
|
"loss": 9.6257, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.5001499250374812, |
|
"grad_norm": 0.27719056606292725, |
|
"learning_rate": 6.052830161658799e-05, |
|
"loss": 9.6287, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.5013493253373313, |
|
"grad_norm": 0.27757909893989563, |
|
"learning_rate": 6.031871326723837e-05, |
|
"loss": 9.6331, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.5025487256371814, |
|
"grad_norm": 0.28167879581451416, |
|
"learning_rate": 6.010893537099316e-05, |
|
"loss": 9.6289, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.5037481259370314, |
|
"grad_norm": 0.28175151348114014, |
|
"learning_rate": 5.9898971781312384e-05, |
|
"loss": 9.6342, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5049475262368815, |
|
"grad_norm": 0.2766707241535187, |
|
"learning_rate": 5.9688826355067105e-05, |
|
"loss": 9.6337, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.5061469265367317, |
|
"grad_norm": 0.29078051447868347, |
|
"learning_rate": 5.9478502952468595e-05, |
|
"loss": 9.6292, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.5073463268365818, |
|
"grad_norm": 0.2969301640987396, |
|
"learning_rate": 5.92680054369974e-05, |
|
"loss": 9.6297, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.5085457271364318, |
|
"grad_norm": 0.29746097326278687, |
|
"learning_rate": 5.905733767533238e-05, |
|
"loss": 9.6367, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.5097451274362819, |
|
"grad_norm": 0.32283881306648254, |
|
"learning_rate": 5.8846503537279715e-05, |
|
"loss": 9.6347, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.510944527736132, |
|
"grad_norm": 0.2667362689971924, |
|
"learning_rate": 5.863550689570179e-05, |
|
"loss": 9.6198, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.512143928035982, |
|
"grad_norm": 0.274853378534317, |
|
"learning_rate": 5.842435162644601e-05, |
|
"loss": 9.6217, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.5133433283358321, |
|
"grad_norm": 0.2751438319683075, |
|
"learning_rate": 5.821304160827371e-05, |
|
"loss": 9.6246, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.5145427286356822, |
|
"grad_norm": 0.279313325881958, |
|
"learning_rate": 5.8001580722788795e-05, |
|
"loss": 9.6222, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.5157421289355323, |
|
"grad_norm": 0.27348318696022034, |
|
"learning_rate": 5.7789972854366536e-05, |
|
"loss": 9.6226, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5169415292353823, |
|
"grad_norm": 0.2807561457157135, |
|
"learning_rate": 5.757822189008214e-05, |
|
"loss": 9.6246, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.5181409295352324, |
|
"grad_norm": 0.28267139196395874, |
|
"learning_rate": 5.7366331719639366e-05, |
|
"loss": 9.6234, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.5193403298350825, |
|
"grad_norm": 0.28372693061828613, |
|
"learning_rate": 5.715430623529909e-05, |
|
"loss": 9.6304, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.5205397301349325, |
|
"grad_norm": 0.2916060984134674, |
|
"learning_rate": 5.6942149331807836e-05, |
|
"loss": 9.6256, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 0.30614909529685974, |
|
"learning_rate": 5.6729864906326136e-05, |
|
"loss": 9.6258, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.5229385307346327, |
|
"grad_norm": 0.32992836833000183, |
|
"learning_rate": 5.651745685835707e-05, |
|
"loss": 9.6317, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.5241379310344828, |
|
"grad_norm": 0.3490801751613617, |
|
"learning_rate": 5.630492908967451e-05, |
|
"loss": 9.6334, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.5253373313343328, |
|
"grad_norm": 0.2723431885242462, |
|
"learning_rate": 5.609228550425154e-05, |
|
"loss": 9.6261, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.5265367316341829, |
|
"grad_norm": 0.26900023221969604, |
|
"learning_rate": 5.5879530008188716e-05, |
|
"loss": 9.6217, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.527736131934033, |
|
"grad_norm": 0.26944032311439514, |
|
"learning_rate": 5.566666650964228e-05, |
|
"loss": 9.6232, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.528935532233883, |
|
"grad_norm": 0.2752003073692322, |
|
"learning_rate": 5.545369891875241e-05, |
|
"loss": 9.6213, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.5301349325337331, |
|
"grad_norm": 0.27395889163017273, |
|
"learning_rate": 5.524063114757139e-05, |
|
"loss": 9.6238, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.5313343328335832, |
|
"grad_norm": 0.2798815965652466, |
|
"learning_rate": 5.5027467109991705e-05, |
|
"loss": 9.6211, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.5325337331334333, |
|
"grad_norm": 0.2802503705024719, |
|
"learning_rate": 5.481421072167423e-05, |
|
"loss": 9.6214, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5337331334332833, |
|
"grad_norm": 0.2843863368034363, |
|
"learning_rate": 5.4600865899976225e-05, |
|
"loss": 9.6235, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.5349325337331334, |
|
"grad_norm": 0.28727102279663086, |
|
"learning_rate": 5.43874365638794e-05, |
|
"loss": 9.6231, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.5361319340329835, |
|
"grad_norm": 0.2915343940258026, |
|
"learning_rate": 5.417392663391796e-05, |
|
"loss": 9.6246, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.5373313343328335, |
|
"grad_norm": 0.295317679643631, |
|
"learning_rate": 5.3960340032106515e-05, |
|
"loss": 9.6214, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.5385307346326836, |
|
"grad_norm": 0.29819992184638977, |
|
"learning_rate": 5.374668068186809e-05, |
|
"loss": 9.6253, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.5397301349325337, |
|
"grad_norm": 0.31375864148139954, |
|
"learning_rate": 5.3532952507962066e-05, |
|
"loss": 9.6318, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5409295352323839, |
|
"grad_norm": 0.27647456526756287, |
|
"learning_rate": 5.3319159436412046e-05, |
|
"loss": 9.616, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.5421289355322338, |
|
"grad_norm": 0.27085641026496887, |
|
"learning_rate": 5.310530539443375e-05, |
|
"loss": 9.6163, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.543328335832084, |
|
"grad_norm": 0.2696115970611572, |
|
"learning_rate": 5.28913943103629e-05, |
|
"loss": 9.6192, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.5445277361319341, |
|
"grad_norm": 0.281088650226593, |
|
"learning_rate": 5.2677430113583005e-05, |
|
"loss": 9.6158, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.545727136431784, |
|
"grad_norm": 0.27259501814842224, |
|
"learning_rate": 5.246341673445323e-05, |
|
"loss": 9.6236, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.5469265367316342, |
|
"grad_norm": 0.2712749242782593, |
|
"learning_rate": 5.22493581042362e-05, |
|
"loss": 9.6263, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.5481259370314843, |
|
"grad_norm": 0.2738138437271118, |
|
"learning_rate": 5.203525815502574e-05, |
|
"loss": 9.6257, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.5493253373313344, |
|
"grad_norm": 0.2845352292060852, |
|
"learning_rate": 5.182112081967466e-05, |
|
"loss": 9.6207, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.5505247376311844, |
|
"grad_norm": 0.2933952510356903, |
|
"learning_rate": 5.160695003172259e-05, |
|
"loss": 9.6218, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 0.32854798436164856, |
|
"learning_rate": 5.13927497253236e-05, |
|
"loss": 9.6258, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5529235382308846, |
|
"grad_norm": 0.32506442070007324, |
|
"learning_rate": 5.1178523835174e-05, |
|
"loss": 9.6305, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.5541229385307347, |
|
"grad_norm": 0.31355130672454834, |
|
"learning_rate": 5.0964276296440075e-05, |
|
"loss": 9.6294, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.5553223388305847, |
|
"grad_norm": 0.27378547191619873, |
|
"learning_rate": 5.075001104468576e-05, |
|
"loss": 9.6173, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.5565217391304348, |
|
"grad_norm": 0.2689533829689026, |
|
"learning_rate": 5.053573201580039e-05, |
|
"loss": 9.6162, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.5577211394302849, |
|
"grad_norm": 0.27102944254875183, |
|
"learning_rate": 5.032144314592633e-05, |
|
"loss": 9.6134, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.5589205397301349, |
|
"grad_norm": 0.2733670473098755, |
|
"learning_rate": 5.010714837138675e-05, |
|
"loss": 9.6183, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.560119940029985, |
|
"grad_norm": 0.2813307046890259, |
|
"learning_rate": 4.989285162861326e-05, |
|
"loss": 9.6184, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.5613193403298351, |
|
"grad_norm": 0.2758241593837738, |
|
"learning_rate": 4.967855685407368e-05, |
|
"loss": 9.6172, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.5625187406296852, |
|
"grad_norm": 0.28401094675064087, |
|
"learning_rate": 4.946426798419962e-05, |
|
"loss": 9.616, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.5637181409295352, |
|
"grad_norm": 0.28821876645088196, |
|
"learning_rate": 4.924998895531425e-05, |
|
"loss": 9.6195, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5649175412293853, |
|
"grad_norm": 0.2845361828804016, |
|
"learning_rate": 4.903572370355993e-05, |
|
"loss": 9.6186, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.5661169415292354, |
|
"grad_norm": 0.29093310236930847, |
|
"learning_rate": 4.882147616482602e-05, |
|
"loss": 9.619, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.5673163418290854, |
|
"grad_norm": 0.28855013847351074, |
|
"learning_rate": 4.8607250274676415e-05, |
|
"loss": 9.6224, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.5685157421289355, |
|
"grad_norm": 0.3042353689670563, |
|
"learning_rate": 4.839304996827741e-05, |
|
"loss": 9.6186, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.5697151424287856, |
|
"grad_norm": 0.32164472341537476, |
|
"learning_rate": 4.817887918032535e-05, |
|
"loss": 9.6202, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5709145427286357, |
|
"grad_norm": 0.26481005549430847, |
|
"learning_rate": 4.7964741844974275e-05, |
|
"loss": 9.6097, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.5721139430284857, |
|
"grad_norm": 0.2751154601573944, |
|
"learning_rate": 4.775064189576381e-05, |
|
"loss": 9.6077, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.5733133433283358, |
|
"grad_norm": 0.26990050077438354, |
|
"learning_rate": 4.7536583265546775e-05, |
|
"loss": 9.609, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.5745127436281859, |
|
"grad_norm": 0.2816186845302582, |
|
"learning_rate": 4.7322569886417006e-05, |
|
"loss": 9.6101, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.5757121439280359, |
|
"grad_norm": 0.2793320417404175, |
|
"learning_rate": 4.71086056896371e-05, |
|
"loss": 9.6206, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.576911544227886, |
|
"grad_norm": 0.2865123748779297, |
|
"learning_rate": 4.689469460556626e-05, |
|
"loss": 9.6109, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.5781109445277361, |
|
"grad_norm": 0.2744526267051697, |
|
"learning_rate": 4.6680840563587966e-05, |
|
"loss": 9.6222, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5793103448275863, |
|
"grad_norm": 0.30048128962516785, |
|
"learning_rate": 4.646704749203793e-05, |
|
"loss": 9.6182, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.5805097451274362, |
|
"grad_norm": 0.29160621762275696, |
|
"learning_rate": 4.6253319318131926e-05, |
|
"loss": 9.618, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5817091454272864, |
|
"grad_norm": 0.31267857551574707, |
|
"learning_rate": 4.60396599678935e-05, |
|
"loss": 9.622, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5829085457271365, |
|
"grad_norm": 0.3598839044570923, |
|
"learning_rate": 4.582607336608205e-05, |
|
"loss": 9.6176, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.5841079460269865, |
|
"grad_norm": 0.33458805084228516, |
|
"learning_rate": 4.561256343612061e-05, |
|
"loss": 9.6256, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.5853073463268366, |
|
"grad_norm": 0.27461162209510803, |
|
"learning_rate": 4.539913410002378e-05, |
|
"loss": 9.6119, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.5865067466266867, |
|
"grad_norm": 0.2723887264728546, |
|
"learning_rate": 4.518578927832577e-05, |
|
"loss": 9.6056, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.5877061469265368, |
|
"grad_norm": 0.2768537998199463, |
|
"learning_rate": 4.4972532890008313e-05, |
|
"loss": 9.6079, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5889055472263868, |
|
"grad_norm": 0.2774599492549896, |
|
"learning_rate": 4.4759368852428625e-05, |
|
"loss": 9.6092, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.5901049475262369, |
|
"grad_norm": 0.27346640825271606, |
|
"learning_rate": 4.45463010812476e-05, |
|
"loss": 9.6143, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.591304347826087, |
|
"grad_norm": 0.2797171175479889, |
|
"learning_rate": 4.433333349035773e-05, |
|
"loss": 9.6168, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.592503748125937, |
|
"grad_norm": 0.2800818085670471, |
|
"learning_rate": 4.4120469991811296e-05, |
|
"loss": 9.6165, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.5937031484257871, |
|
"grad_norm": 0.280519038438797, |
|
"learning_rate": 4.390771449574846e-05, |
|
"loss": 9.6195, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5949025487256372, |
|
"grad_norm": 0.2884778678417206, |
|
"learning_rate": 4.369507091032551e-05, |
|
"loss": 9.6132, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.5961019490254873, |
|
"grad_norm": 0.2894138693809509, |
|
"learning_rate": 4.3482543141642943e-05, |
|
"loss": 9.6147, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.5973013493253373, |
|
"grad_norm": 0.2868705093860626, |
|
"learning_rate": 4.327013509367386e-05, |
|
"loss": 9.6242, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5985007496251874, |
|
"grad_norm": 0.2994021773338318, |
|
"learning_rate": 4.305785066819218e-05, |
|
"loss": 9.6189, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.5997001499250375, |
|
"grad_norm": 0.3168644607067108, |
|
"learning_rate": 4.2845693764700914e-05, |
|
"loss": 9.6247, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6008995502248875, |
|
"grad_norm": 0.26666632294654846, |
|
"learning_rate": 4.263366828036065e-05, |
|
"loss": 9.6057, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.6020989505247376, |
|
"grad_norm": 0.26327091455459595, |
|
"learning_rate": 4.242177810991789e-05, |
|
"loss": 9.6115, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.6032983508245877, |
|
"grad_norm": 0.27538183331489563, |
|
"learning_rate": 4.221002714563347e-05, |
|
"loss": 9.6082, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.6044977511244378, |
|
"grad_norm": 0.27597832679748535, |
|
"learning_rate": 4.19984192772112e-05, |
|
"loss": 9.6075, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.6056971514242878, |
|
"grad_norm": 0.28365880250930786, |
|
"learning_rate": 4.1786958391726314e-05, |
|
"loss": 9.6136, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.6068965517241379, |
|
"grad_norm": 0.2802659273147583, |
|
"learning_rate": 4.1575648373554e-05, |
|
"loss": 9.6158, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.608095952023988, |
|
"grad_norm": 0.2841864228248596, |
|
"learning_rate": 4.136449310429822e-05, |
|
"loss": 9.6115, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.6092953523238381, |
|
"grad_norm": 0.2928536832332611, |
|
"learning_rate": 4.115349646272029e-05, |
|
"loss": 9.6156, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.6104947526236881, |
|
"grad_norm": 0.2854699492454529, |
|
"learning_rate": 4.0942662324667627e-05, |
|
"loss": 9.6137, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.6116941529235382, |
|
"grad_norm": 0.29192522168159485, |
|
"learning_rate": 4.0731994563002606e-05, |
|
"loss": 9.6136, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6128935532233883, |
|
"grad_norm": 0.3441016674041748, |
|
"learning_rate": 4.052149704753142e-05, |
|
"loss": 9.6224, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.6140929535232383, |
|
"grad_norm": 0.3597991466522217, |
|
"learning_rate": 4.03111736449329e-05, |
|
"loss": 9.6219, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.6152923538230884, |
|
"grad_norm": 0.2781412899494171, |
|
"learning_rate": 4.010102821868762e-05, |
|
"loss": 9.6056, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.6164917541229386, |
|
"grad_norm": 0.27280357480049133, |
|
"learning_rate": 3.989106462900686e-05, |
|
"loss": 9.6063, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.6176911544227887, |
|
"grad_norm": 0.27366748452186584, |
|
"learning_rate": 3.968128673276165e-05, |
|
"loss": 9.6104, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.6188905547226387, |
|
"grad_norm": 0.27588704228401184, |
|
"learning_rate": 3.947169838341202e-05, |
|
"loss": 9.605, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.6200899550224888, |
|
"grad_norm": 0.27753859758377075, |
|
"learning_rate": 3.9262303430936164e-05, |
|
"loss": 9.6033, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.6212893553223389, |
|
"grad_norm": 0.27255064249038696, |
|
"learning_rate": 3.9053105721759696e-05, |
|
"loss": 9.6098, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.6224887556221889, |
|
"grad_norm": 0.2782951295375824, |
|
"learning_rate": 3.8844109098685045e-05, |
|
"loss": 9.6184, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.623688155922039, |
|
"grad_norm": 0.28660768270492554, |
|
"learning_rate": 3.8635317400820855e-05, |
|
"loss": 9.6113, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6248875562218891, |
|
"grad_norm": 0.28494128584861755, |
|
"learning_rate": 3.842673446351138e-05, |
|
"loss": 9.6105, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.6260869565217392, |
|
"grad_norm": 0.28198301792144775, |
|
"learning_rate": 3.82183641182662e-05, |
|
"loss": 9.626, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.6272863568215892, |
|
"grad_norm": 0.2875995337963104, |
|
"learning_rate": 3.801021019268969e-05, |
|
"loss": 9.6176, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.6284857571214393, |
|
"grad_norm": 0.2956449091434479, |
|
"learning_rate": 3.780227651041073e-05, |
|
"loss": 9.6229, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.6296851574212894, |
|
"grad_norm": 0.37847524881362915, |
|
"learning_rate": 3.7594566891012546e-05, |
|
"loss": 9.6214, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.6308845577211394, |
|
"grad_norm": 0.27030348777770996, |
|
"learning_rate": 3.7387085149962507e-05, |
|
"loss": 9.6011, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.6320839580209895, |
|
"grad_norm": 0.274962455034256, |
|
"learning_rate": 3.717983509854198e-05, |
|
"loss": 9.6023, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.6332833583208396, |
|
"grad_norm": 0.27726373076438904, |
|
"learning_rate": 3.69728205437764e-05, |
|
"loss": 9.6102, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.6344827586206897, |
|
"grad_norm": 0.27569401264190674, |
|
"learning_rate": 3.676604528836535e-05, |
|
"loss": 9.6077, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.6356821589205397, |
|
"grad_norm": 0.2719118893146515, |
|
"learning_rate": 3.6559513130612565e-05, |
|
"loss": 9.6078, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6368815592203898, |
|
"grad_norm": 0.27930060029029846, |
|
"learning_rate": 3.635322786435635e-05, |
|
"loss": 9.6099, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.6380809595202399, |
|
"grad_norm": 0.2761722505092621, |
|
"learning_rate": 3.614719327889978e-05, |
|
"loss": 9.6161, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.6392803598200899, |
|
"grad_norm": 0.2825543284416199, |
|
"learning_rate": 3.594141315894108e-05, |
|
"loss": 9.616, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.64047976011994, |
|
"grad_norm": 0.28519946336746216, |
|
"learning_rate": 3.573589128450418e-05, |
|
"loss": 9.6134, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.6416791604197901, |
|
"grad_norm": 0.2859567105770111, |
|
"learning_rate": 3.5530631430869234e-05, |
|
"loss": 9.6181, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.6428785607196402, |
|
"grad_norm": 0.293560653924942, |
|
"learning_rate": 3.532563736850322e-05, |
|
"loss": 9.6141, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.6440779610194902, |
|
"grad_norm": 0.31543228030204773, |
|
"learning_rate": 3.512091286299081e-05, |
|
"loss": 9.6132, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.6452773613193403, |
|
"grad_norm": 0.28361520171165466, |
|
"learning_rate": 3.491646167496507e-05, |
|
"loss": 9.5993, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.6464767616191904, |
|
"grad_norm": 0.2670563757419586, |
|
"learning_rate": 3.4712287560038446e-05, |
|
"loss": 9.6042, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.6476761619190404, |
|
"grad_norm": 0.2657446265220642, |
|
"learning_rate": 3.450839426873378e-05, |
|
"loss": 9.6106, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6488755622188905, |
|
"grad_norm": 0.271816611289978, |
|
"learning_rate": 3.4304785546415374e-05, |
|
"loss": 9.608, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.6500749625187406, |
|
"grad_norm": 0.27191296219825745, |
|
"learning_rate": 3.41014651332202e-05, |
|
"loss": 9.6103, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.6512743628185907, |
|
"grad_norm": 0.27644070982933044, |
|
"learning_rate": 3.3898436763989247e-05, |
|
"loss": 9.6039, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.6524737631184407, |
|
"grad_norm": 0.27742430567741394, |
|
"learning_rate": 3.369570416819889e-05, |
|
"loss": 9.6053, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.6536731634182908, |
|
"grad_norm": 0.2793113589286804, |
|
"learning_rate": 3.349327106989232e-05, |
|
"loss": 9.615, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.654872563718141, |
|
"grad_norm": 0.28077057003974915, |
|
"learning_rate": 3.329114118761123e-05, |
|
"loss": 9.6101, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.656071964017991, |
|
"grad_norm": 0.2894865870475769, |
|
"learning_rate": 3.308931823432744e-05, |
|
"loss": 9.6093, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.6572713643178411, |
|
"grad_norm": 0.2894723415374756, |
|
"learning_rate": 3.288780591737474e-05, |
|
"loss": 9.6141, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.6584707646176912, |
|
"grad_norm": 0.3010658323764801, |
|
"learning_rate": 3.268660793838074e-05, |
|
"loss": 9.6249, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.6596701649175413, |
|
"grad_norm": 0.3542385399341583, |
|
"learning_rate": 3.2485727993198945e-05, |
|
"loss": 9.6182, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6608695652173913, |
|
"grad_norm": 0.2821604907512665, |
|
"learning_rate": 3.228516977184075e-05, |
|
"loss": 9.6229, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.6620689655172414, |
|
"grad_norm": 0.27113085985183716, |
|
"learning_rate": 3.2084936958407805e-05, |
|
"loss": 9.6041, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.6632683658170915, |
|
"grad_norm": 0.26982516050338745, |
|
"learning_rate": 3.188503323102425e-05, |
|
"loss": 9.6084, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.6644677661169416, |
|
"grad_norm": 0.2756569981575012, |
|
"learning_rate": 3.1685462261769105e-05, |
|
"loss": 9.6126, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.6656671664167916, |
|
"grad_norm": 0.27629488706588745, |
|
"learning_rate": 3.1486227716608946e-05, |
|
"loss": 9.6056, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.6668665667166417, |
|
"grad_norm": 0.28036460280418396, |
|
"learning_rate": 3.128733325533047e-05, |
|
"loss": 9.6054, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.6680659670164918, |
|
"grad_norm": 0.27844056487083435, |
|
"learning_rate": 3.1088782531473266e-05, |
|
"loss": 9.6111, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.6692653673163418, |
|
"grad_norm": 0.2862386405467987, |
|
"learning_rate": 3.089057919226277e-05, |
|
"loss": 9.612, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.6704647676161919, |
|
"grad_norm": 0.2859496474266052, |
|
"learning_rate": 3.069272687854322e-05, |
|
"loss": 9.6114, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.671664167916042, |
|
"grad_norm": 0.28554123640060425, |
|
"learning_rate": 3.049522922471075e-05, |
|
"loss": 9.6105, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6728635682158921, |
|
"grad_norm": 0.30089861154556274, |
|
"learning_rate": 3.02980898586467e-05, |
|
"loss": 9.6205, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.6740629685157421, |
|
"grad_norm": 0.30331140756607056, |
|
"learning_rate": 3.0101312401650937e-05, |
|
"loss": 9.6158, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.6752623688155922, |
|
"grad_norm": 0.2732248902320862, |
|
"learning_rate": 2.9904900468375297e-05, |
|
"loss": 9.6064, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.6764617691154423, |
|
"grad_norm": 0.27510005235671997, |
|
"learning_rate": 2.9708857666757246e-05, |
|
"loss": 9.6019, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.6776611694152923, |
|
"grad_norm": 0.27365824580192566, |
|
"learning_rate": 2.9513187597953607e-05, |
|
"loss": 9.5995, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.6788605697151424, |
|
"grad_norm": 0.2792357802391052, |
|
"learning_rate": 2.931789385627433e-05, |
|
"loss": 9.606, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.6800599700149925, |
|
"grad_norm": 0.2759556770324707, |
|
"learning_rate": 2.9122980029116586e-05, |
|
"loss": 9.6039, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.6812593703148426, |
|
"grad_norm": 0.2814030647277832, |
|
"learning_rate": 2.8928449696898763e-05, |
|
"loss": 9.602, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.6824587706146926, |
|
"grad_norm": 0.2769099771976471, |
|
"learning_rate": 2.8734306432994735e-05, |
|
"loss": 9.6079, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.6836581709145427, |
|
"grad_norm": 0.2809275686740875, |
|
"learning_rate": 2.8540553803668252e-05, |
|
"loss": 9.613, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6848575712143928, |
|
"grad_norm": 0.275016725063324, |
|
"learning_rate": 2.8347195368007418e-05, |
|
"loss": 9.6097, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.6860569715142428, |
|
"grad_norm": 0.2964610755443573, |
|
"learning_rate": 2.815423467785925e-05, |
|
"loss": 9.6111, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.6872563718140929, |
|
"grad_norm": 0.2884480059146881, |
|
"learning_rate": 2.7961675277764498e-05, |
|
"loss": 9.6089, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.688455772113943, |
|
"grad_norm": 0.30310893058776855, |
|
"learning_rate": 2.7769520704892566e-05, |
|
"loss": 9.6102, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.4733683466911316, |
|
"learning_rate": 2.757777448897646e-05, |
|
"loss": 9.6083, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6908545727136431, |
|
"grad_norm": 0.272512823343277, |
|
"learning_rate": 2.7386440152247933e-05, |
|
"loss": 9.5963, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.6920539730134933, |
|
"grad_norm": 0.2810138165950775, |
|
"learning_rate": 2.71955212093729e-05, |
|
"loss": 9.6012, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.6932533733133434, |
|
"grad_norm": 0.2755623161792755, |
|
"learning_rate": 2.7005021167386803e-05, |
|
"loss": 9.6022, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.6944527736131934, |
|
"grad_norm": 0.2718299329280853, |
|
"learning_rate": 2.681494352563013e-05, |
|
"loss": 9.6096, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 0.2746315896511078, |
|
"learning_rate": 2.6625291775684292e-05, |
|
"loss": 9.6124, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6968515742128936, |
|
"grad_norm": 0.2844776511192322, |
|
"learning_rate": 2.6436069401307284e-05, |
|
"loss": 9.6054, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.6980509745127437, |
|
"grad_norm": 0.2785060703754425, |
|
"learning_rate": 2.624727987836991e-05, |
|
"loss": 9.6112, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6992503748125937, |
|
"grad_norm": 0.2840147316455841, |
|
"learning_rate": 2.6058926674791728e-05, |
|
"loss": 9.6061, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.7004497751124438, |
|
"grad_norm": 0.28523436188697815, |
|
"learning_rate": 2.5871013250477528e-05, |
|
"loss": 9.6057, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.7016491754122939, |
|
"grad_norm": 0.29284006357192993, |
|
"learning_rate": 2.56835430572536e-05, |
|
"loss": 9.6091, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.7028485757121439, |
|
"grad_norm": 0.29574641585350037, |
|
"learning_rate": 2.5496519538804486e-05, |
|
"loss": 9.6155, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.704047976011994, |
|
"grad_norm": 0.3032572269439697, |
|
"learning_rate": 2.530994613060965e-05, |
|
"loss": 9.6162, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.7052473763118441, |
|
"grad_norm": 0.2718828320503235, |
|
"learning_rate": 2.5123826259880323e-05, |
|
"loss": 9.6001, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.7064467766116942, |
|
"grad_norm": 0.27074381709098816, |
|
"learning_rate": 2.493816334549664e-05, |
|
"loss": 9.6014, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.7076461769115442, |
|
"grad_norm": 0.2791549265384674, |
|
"learning_rate": 2.4752960797944802e-05, |
|
"loss": 9.5998, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.7088455772113943, |
|
"grad_norm": 0.28340011835098267, |
|
"learning_rate": 2.4568222019254377e-05, |
|
"loss": 9.5979, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.7100449775112444, |
|
"grad_norm": 0.2762751579284668, |
|
"learning_rate": 2.43839504029359e-05, |
|
"loss": 9.6032, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.7112443778110945, |
|
"grad_norm": 0.2753763198852539, |
|
"learning_rate": 2.4200149333918487e-05, |
|
"loss": 9.6089, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.7124437781109445, |
|
"grad_norm": 0.27482444047927856, |
|
"learning_rate": 2.4016822188487603e-05, |
|
"loss": 9.6081, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.7136431784107946, |
|
"grad_norm": 0.28210797905921936, |
|
"learning_rate": 2.383397233422318e-05, |
|
"loss": 9.6041, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.7148425787106447, |
|
"grad_norm": 0.2853706479072571, |
|
"learning_rate": 2.3651603129937592e-05, |
|
"loss": 9.6042, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.7160419790104947, |
|
"grad_norm": 0.3066234886646271, |
|
"learning_rate": 2.346971792561413e-05, |
|
"loss": 9.6053, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.7172413793103448, |
|
"grad_norm": 0.2879929542541504, |
|
"learning_rate": 2.3288320062345277e-05, |
|
"loss": 9.6069, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.7184407796101949, |
|
"grad_norm": 0.35332369804382324, |
|
"learning_rate": 2.3107412872271518e-05, |
|
"loss": 9.6162, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.719640179910045, |
|
"grad_norm": 0.5152252316474915, |
|
"learning_rate": 2.2926999678519974e-05, |
|
"loss": 9.6182, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.720839580209895, |
|
"grad_norm": 0.2663346230983734, |
|
"learning_rate": 2.274708379514348e-05, |
|
"loss": 9.5986, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.7220389805097451, |
|
"grad_norm": 0.27524423599243164, |
|
"learning_rate": 2.256766852705967e-05, |
|
"loss": 9.5986, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.7232383808095952, |
|
"grad_norm": 0.2814219295978546, |
|
"learning_rate": 2.238875716999019e-05, |
|
"loss": 9.6037, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.7244377811094452, |
|
"grad_norm": 0.2859136760234833, |
|
"learning_rate": 2.221035301040027e-05, |
|
"loss": 9.6002, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.7256371814092953, |
|
"grad_norm": 0.27460747957229614, |
|
"learning_rate": 2.2032459325438336e-05, |
|
"loss": 9.6031, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.7268365817091454, |
|
"grad_norm": 0.2745445966720581, |
|
"learning_rate": 2.185507938287572e-05, |
|
"loss": 9.6072, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.7280359820089956, |
|
"grad_norm": 0.2816024124622345, |
|
"learning_rate": 2.1678216441046734e-05, |
|
"loss": 9.6128, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.7292353823088455, |
|
"grad_norm": 0.28734058141708374, |
|
"learning_rate": 2.1501873748788802e-05, |
|
"loss": 9.6127, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.7304347826086957, |
|
"grad_norm": 0.28445249795913696, |
|
"learning_rate": 2.1326054545382695e-05, |
|
"loss": 9.6118, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.7316341829085458, |
|
"grad_norm": 0.2825443148612976, |
|
"learning_rate": 2.1150762060493155e-05, |
|
"loss": 9.6182, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7328335832083958, |
|
"grad_norm": 0.29409319162368774, |
|
"learning_rate": 2.09759995141095e-05, |
|
"loss": 9.611, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.7340329835082459, |
|
"grad_norm": 0.30348506569862366, |
|
"learning_rate": 2.0801770116486447e-05, |
|
"loss": 9.6193, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.735232383808096, |
|
"grad_norm": 0.2586905360221863, |
|
"learning_rate": 2.0628077068085173e-05, |
|
"loss": 9.6146, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.7364317841079461, |
|
"grad_norm": 0.27243587374687195, |
|
"learning_rate": 2.0454923559514595e-05, |
|
"loss": 9.6025, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.7376311844077961, |
|
"grad_norm": 0.27491042017936707, |
|
"learning_rate": 2.028231277147261e-05, |
|
"loss": 9.6013, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.7388305847076462, |
|
"grad_norm": 0.279153048992157, |
|
"learning_rate": 2.0110247874687815e-05, |
|
"loss": 9.5937, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.7400299850074963, |
|
"grad_norm": 0.27780649065971375, |
|
"learning_rate": 1.993873202986119e-05, |
|
"loss": 9.6022, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.7412293853073463, |
|
"grad_norm": 0.2798539698123932, |
|
"learning_rate": 1.976776838760801e-05, |
|
"loss": 9.6022, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.7424287856071964, |
|
"grad_norm": 0.27843162417411804, |
|
"learning_rate": 1.9597360088400052e-05, |
|
"loss": 9.6062, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.7436281859070465, |
|
"grad_norm": 0.27371302247047424, |
|
"learning_rate": 1.9427510262507864e-05, |
|
"loss": 9.6119, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7448275862068966, |
|
"grad_norm": 0.2873663604259491, |
|
"learning_rate": 1.925822202994323e-05, |
|
"loss": 9.6004, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.7460269865067466, |
|
"grad_norm": 0.2875591218471527, |
|
"learning_rate": 1.9089498500401914e-05, |
|
"loss": 9.6119, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.7472263868065967, |
|
"grad_norm": 0.2853778004646301, |
|
"learning_rate": 1.892134277320655e-05, |
|
"loss": 9.6091, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.7484257871064468, |
|
"grad_norm": 0.2952004075050354, |
|
"learning_rate": 1.87537579372496e-05, |
|
"loss": 9.6182, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.7496251874062968, |
|
"grad_norm": 0.3686712980270386, |
|
"learning_rate": 1.858674707093675e-05, |
|
"loss": 9.614, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.7508245877061469, |
|
"grad_norm": 0.2664184868335724, |
|
"learning_rate": 1.8420313242130293e-05, |
|
"loss": 9.6005, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.752023988005997, |
|
"grad_norm": 0.2688407301902771, |
|
"learning_rate": 1.8254459508092768e-05, |
|
"loss": 9.5988, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.7532233883058471, |
|
"grad_norm": 0.2794104516506195, |
|
"learning_rate": 1.8089188915430793e-05, |
|
"loss": 9.5987, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.7544227886056971, |
|
"grad_norm": 0.26486334204673767, |
|
"learning_rate": 1.792450450003919e-05, |
|
"loss": 9.6129, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.7556221889055472, |
|
"grad_norm": 0.2762359082698822, |
|
"learning_rate": 1.7760409287045078e-05, |
|
"loss": 9.6052, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7568215892053973, |
|
"grad_norm": 0.27764591574668884, |
|
"learning_rate": 1.7596906290752425e-05, |
|
"loss": 9.6056, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.7580209895052473, |
|
"grad_norm": 0.276153028011322, |
|
"learning_rate": 1.743399851458663e-05, |
|
"loss": 9.609, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.7592203898050974, |
|
"grad_norm": 0.2780199646949768, |
|
"learning_rate": 1.727168895103931e-05, |
|
"loss": 9.6081, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.7604197901049475, |
|
"grad_norm": 0.276457816362381, |
|
"learning_rate": 1.7109980581613417e-05, |
|
"loss": 9.6062, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.7616191904047976, |
|
"grad_norm": 0.2808220088481903, |
|
"learning_rate": 1.6948876376768418e-05, |
|
"loss": 9.6123, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.7628185907046476, |
|
"grad_norm": 0.29566583037376404, |
|
"learning_rate": 1.6788379295865704e-05, |
|
"loss": 9.6094, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.7640179910044977, |
|
"grad_norm": 0.33136534690856934, |
|
"learning_rate": 1.6628492287114296e-05, |
|
"loss": 9.614, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.7652173913043478, |
|
"grad_norm": 0.27251994609832764, |
|
"learning_rate": 1.6469218287516664e-05, |
|
"loss": 9.6011, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.766416791604198, |
|
"grad_norm": 0.2670121490955353, |
|
"learning_rate": 1.6310560222814714e-05, |
|
"loss": 9.6037, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.767616191904048, |
|
"grad_norm": 0.2792399227619171, |
|
"learning_rate": 1.6152521007436145e-05, |
|
"loss": 9.6036, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7688155922038981, |
|
"grad_norm": 0.275511234998703, |
|
"learning_rate": 1.599510354444087e-05, |
|
"loss": 9.5973, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.7700149925037482, |
|
"grad_norm": 0.2751782536506653, |
|
"learning_rate": 1.5838310725467644e-05, |
|
"loss": 9.6005, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.7712143928035982, |
|
"grad_norm": 0.28111734986305237, |
|
"learning_rate": 1.5682145430681027e-05, |
|
"loss": 9.6015, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.7724137931034483, |
|
"grad_norm": 0.2826797068119049, |
|
"learning_rate": 1.5526610528718415e-05, |
|
"loss": 9.6054, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.7736131934032984, |
|
"grad_norm": 0.28505128622055054, |
|
"learning_rate": 1.5371708876637354e-05, |
|
"loss": 9.6042, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.7748125937031485, |
|
"grad_norm": 0.28200674057006836, |
|
"learning_rate": 1.5217443319863112e-05, |
|
"loss": 9.6051, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.7760119940029985, |
|
"grad_norm": 0.2859637439250946, |
|
"learning_rate": 1.5063816692136373e-05, |
|
"loss": 9.6004, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.7772113943028486, |
|
"grad_norm": 0.28504401445388794, |
|
"learning_rate": 1.4910831815461123e-05, |
|
"loss": 9.6177, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.7784107946026987, |
|
"grad_norm": 0.2949487268924713, |
|
"learning_rate": 1.4758491500052924e-05, |
|
"loss": 9.6204, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.7796101949025487, |
|
"grad_norm": 0.3952041268348694, |
|
"learning_rate": 1.4606798544287243e-05, |
|
"loss": 9.62, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7808095952023988, |
|
"grad_norm": 0.2684868574142456, |
|
"learning_rate": 1.445575573464799e-05, |
|
"loss": 9.5986, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.7820089955022489, |
|
"grad_norm": 0.2751760184764862, |
|
"learning_rate": 1.4305365845676439e-05, |
|
"loss": 9.5993, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.783208395802099, |
|
"grad_norm": 0.27565452456474304, |
|
"learning_rate": 1.4155631639920209e-05, |
|
"loss": 9.5939, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.784407796101949, |
|
"grad_norm": 0.27967387437820435, |
|
"learning_rate": 1.4006555867882464e-05, |
|
"loss": 9.6024, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.7856071964017991, |
|
"grad_norm": 0.28178393840789795, |
|
"learning_rate": 1.3858141267971491e-05, |
|
"loss": 9.6057, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.7868065967016492, |
|
"grad_norm": 0.27983683347702026, |
|
"learning_rate": 1.3710390566450366e-05, |
|
"loss": 9.6059, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.7880059970014992, |
|
"grad_norm": 0.286726713180542, |
|
"learning_rate": 1.3563306477386784e-05, |
|
"loss": 9.6032, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.7892053973013493, |
|
"grad_norm": 0.2814926505088806, |
|
"learning_rate": 1.3416891702603358e-05, |
|
"loss": 9.6077, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.7904047976011994, |
|
"grad_norm": 0.291660875082016, |
|
"learning_rate": 1.3271148931627858e-05, |
|
"loss": 9.6055, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.7916041979010495, |
|
"grad_norm": 0.2863795757293701, |
|
"learning_rate": 1.3126080841643856e-05, |
|
"loss": 9.6111, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7928035982008995, |
|
"grad_norm": 0.2854698896408081, |
|
"learning_rate": 1.2981690097441573e-05, |
|
"loss": 9.6172, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.7940029985007496, |
|
"grad_norm": 0.3119170367717743, |
|
"learning_rate": 1.2837979351368912e-05, |
|
"loss": 9.6102, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.7952023988005997, |
|
"grad_norm": 0.27526015043258667, |
|
"learning_rate": 1.2694951243282683e-05, |
|
"loss": 9.6006, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.7964017991004497, |
|
"grad_norm": 0.27086350321769714, |
|
"learning_rate": 1.2552608400500199e-05, |
|
"loss": 9.6, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.7976011994002998, |
|
"grad_norm": 0.2674426734447479, |
|
"learning_rate": 1.2410953437750966e-05, |
|
"loss": 9.599, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.7988005997001499, |
|
"grad_norm": 0.26960834860801697, |
|
"learning_rate": 1.2269988957128636e-05, |
|
"loss": 9.6059, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.27745890617370605, |
|
"learning_rate": 1.212971754804324e-05, |
|
"loss": 9.6046, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.80119940029985, |
|
"grad_norm": 0.2803892493247986, |
|
"learning_rate": 1.1990141787173648e-05, |
|
"loss": 9.6036, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.8023988005997001, |
|
"grad_norm": 0.2826705574989319, |
|
"learning_rate": 1.1851264238420135e-05, |
|
"loss": 9.6031, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.8035982008995503, |
|
"grad_norm": 0.28543218970298767, |
|
"learning_rate": 1.1713087452857408e-05, |
|
"loss": 9.6047, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.8047976011994002, |
|
"grad_norm": 0.2749161124229431, |
|
"learning_rate": 1.1575613968687682e-05, |
|
"loss": 9.6061, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.8059970014992504, |
|
"grad_norm": 0.2880239486694336, |
|
"learning_rate": 1.1438846311194024e-05, |
|
"loss": 9.607, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.8071964017991005, |
|
"grad_norm": 0.2794909179210663, |
|
"learning_rate": 1.1302786992694048e-05, |
|
"loss": 9.6098, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.8083958020989506, |
|
"grad_norm": 0.3027547299861908, |
|
"learning_rate": 1.1167438512493683e-05, |
|
"loss": 9.6116, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.8095952023988006, |
|
"grad_norm": 0.34445720911026, |
|
"learning_rate": 1.1032803356841342e-05, |
|
"loss": 9.6171, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.8107946026986507, |
|
"grad_norm": 0.2722347378730774, |
|
"learning_rate": 1.0898883998882158e-05, |
|
"loss": 9.601, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.8119940029985008, |
|
"grad_norm": 0.27299922704696655, |
|
"learning_rate": 1.0765682898612656e-05, |
|
"loss": 9.5976, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.8131934032983508, |
|
"grad_norm": 0.2737182080745697, |
|
"learning_rate": 1.0633202502835494e-05, |
|
"loss": 9.5965, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.8143928035982009, |
|
"grad_norm": 0.2752780020236969, |
|
"learning_rate": 1.0501445245114522e-05, |
|
"loss": 9.6009, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.815592203898051, |
|
"grad_norm": 0.2721465826034546, |
|
"learning_rate": 1.0370413545730118e-05, |
|
"loss": 9.6064, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.8167916041979011, |
|
"grad_norm": 0.2846396267414093, |
|
"learning_rate": 1.0240109811634712e-05, |
|
"loss": 9.5995, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.8179910044977511, |
|
"grad_norm": 0.28411293029785156, |
|
"learning_rate": 1.0110536436408535e-05, |
|
"loss": 9.5975, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.8191904047976012, |
|
"grad_norm": 0.2815098762512207, |
|
"learning_rate": 9.9816958002157e-06, |
|
"loss": 9.6078, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.8203898050974513, |
|
"grad_norm": 0.278131902217865, |
|
"learning_rate": 9.853590269760493e-06, |
|
"loss": 9.6143, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.8215892053973014, |
|
"grad_norm": 0.2930939197540283, |
|
"learning_rate": 9.726222198243806e-06, |
|
"loss": 9.6042, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.8227886056971514, |
|
"grad_norm": 0.2876308560371399, |
|
"learning_rate": 9.599593925320016e-06, |
|
"loss": 9.6187, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.8239880059970015, |
|
"grad_norm": 0.3398456573486328, |
|
"learning_rate": 9.47370777705397e-06, |
|
"loss": 9.6115, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.8251874062968516, |
|
"grad_norm": 0.28324592113494873, |
|
"learning_rate": 9.348566065878217e-06, |
|
"loss": 9.5972, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.8263868065967016, |
|
"grad_norm": 0.271178662776947, |
|
"learning_rate": 9.224171090550571e-06, |
|
"loss": 9.6004, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 0.26743438839912415, |
|
"learning_rate": 9.100525136111915e-06, |
|
"loss": 9.604, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.8287856071964018, |
|
"grad_norm": 0.2741158604621887, |
|
"learning_rate": 8.97763047384414e-06, |
|
"loss": 9.6024, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.8299850074962519, |
|
"grad_norm": 0.2776412069797516, |
|
"learning_rate": 8.855489361228496e-06, |
|
"loss": 9.5996, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.8311844077961019, |
|
"grad_norm": 0.2762274742126465, |
|
"learning_rate": 8.734104041904129e-06, |
|
"loss": 9.6041, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.832383808095952, |
|
"grad_norm": 0.2758176624774933, |
|
"learning_rate": 8.61347674562677e-06, |
|
"loss": 9.6084, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.8335832083958021, |
|
"grad_norm": 0.28230342268943787, |
|
"learning_rate": 8.4936096882279e-06, |
|
"loss": 9.6047, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.8347826086956521, |
|
"grad_norm": 0.28801631927490234, |
|
"learning_rate": 8.37450507157399e-06, |
|
"loss": 9.6084, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.8359820089955022, |
|
"grad_norm": 0.289760559797287, |
|
"learning_rate": 8.256165083526019e-06, |
|
"loss": 9.6033, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.8371814092953523, |
|
"grad_norm": 0.29011571407318115, |
|
"learning_rate": 8.138591897899345e-06, |
|
"loss": 9.6161, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.8383808095952024, |
|
"grad_norm": 0.3083633780479431, |
|
"learning_rate": 8.021787674423775e-06, |
|
"loss": 9.6152, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.8395802098950524, |
|
"grad_norm": 0.36470380425453186, |
|
"learning_rate": 7.905754558703803e-06, |
|
"loss": 9.6132, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8407796101949025, |
|
"grad_norm": 0.26850220561027527, |
|
"learning_rate": 7.790494682179317e-06, |
|
"loss": 9.5949, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.8419790104947527, |
|
"grad_norm": 0.2714633643627167, |
|
"learning_rate": 7.676010162086388e-06, |
|
"loss": 9.604, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.8431784107946027, |
|
"grad_norm": 0.2753824293613434, |
|
"learning_rate": 7.56230310141835e-06, |
|
"loss": 9.5993, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.8443778110944528, |
|
"grad_norm": 0.2757047116756439, |
|
"learning_rate": 7.449375588887203e-06, |
|
"loss": 9.5993, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.8455772113943029, |
|
"grad_norm": 0.27331098914146423, |
|
"learning_rate": 7.337229698885279e-06, |
|
"loss": 9.6088, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.846776611694153, |
|
"grad_norm": 0.2818980813026428, |
|
"learning_rate": 7.225867491447053e-06, |
|
"loss": 9.6, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.847976011994003, |
|
"grad_norm": 0.2784759998321533, |
|
"learning_rate": 7.115291012211383e-06, |
|
"loss": 9.6056, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.8491754122938531, |
|
"grad_norm": 0.2809768319129944, |
|
"learning_rate": 7.005502292383898e-06, |
|
"loss": 9.6092, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.8503748125937032, |
|
"grad_norm": 0.29430076479911804, |
|
"learning_rate": 6.896503348699657e-06, |
|
"loss": 9.6031, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.8515742128935532, |
|
"grad_norm": 0.28350192308425903, |
|
"learning_rate": 6.788296183386162e-06, |
|
"loss": 9.6105, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8527736131934033, |
|
"grad_norm": 0.29121461510658264, |
|
"learning_rate": 6.680882784126552e-06, |
|
"loss": 9.6108, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.8539730134932534, |
|
"grad_norm": 0.3215639889240265, |
|
"learning_rate": 6.5742651240230545e-06, |
|
"loss": 9.6104, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.8551724137931035, |
|
"grad_norm": 0.27074047923088074, |
|
"learning_rate": 6.46844516156081e-06, |
|
"loss": 9.598, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.8563718140929535, |
|
"grad_norm": 0.2728975713253021, |
|
"learning_rate": 6.363424840571869e-06, |
|
"loss": 9.5965, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.8575712143928036, |
|
"grad_norm": 0.2756417393684387, |
|
"learning_rate": 6.259206090199426e-06, |
|
"loss": 9.6021, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.8587706146926537, |
|
"grad_norm": 0.28334730863571167, |
|
"learning_rate": 6.155790824862484e-06, |
|
"loss": 9.5923, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.8599700149925037, |
|
"grad_norm": 0.2780725359916687, |
|
"learning_rate": 6.053180944220627e-06, |
|
"loss": 9.5977, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.8611694152923538, |
|
"grad_norm": 0.2788305878639221, |
|
"learning_rate": 5.951378333139118e-06, |
|
"loss": 9.6104, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.8623688155922039, |
|
"grad_norm": 0.28093886375427246, |
|
"learning_rate": 5.850384861654329e-06, |
|
"loss": 9.5993, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.863568215892054, |
|
"grad_norm": 0.28586381673812866, |
|
"learning_rate": 5.750202384939313e-06, |
|
"loss": 9.6017, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.864767616191904, |
|
"grad_norm": 0.2862658202648163, |
|
"learning_rate": 5.650832743269779e-06, |
|
"loss": 9.6105, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.8659670164917541, |
|
"grad_norm": 0.29242414236068726, |
|
"learning_rate": 5.552277761990294e-06, |
|
"loss": 9.6003, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.8671664167916042, |
|
"grad_norm": 0.2869936525821686, |
|
"learning_rate": 5.454539251480739e-06, |
|
"loss": 9.6101, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.8683658170914542, |
|
"grad_norm": 0.30616676807403564, |
|
"learning_rate": 5.3576190071230106e-06, |
|
"loss": 9.6093, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.49281755089759827, |
|
"learning_rate": 5.2615188092681176e-06, |
|
"loss": 9.6174, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.8707646176911544, |
|
"grad_norm": 0.27237847447395325, |
|
"learning_rate": 5.166240423203428e-06, |
|
"loss": 9.5972, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.8719640179910045, |
|
"grad_norm": 0.26910293102264404, |
|
"learning_rate": 5.071785599120243e-06, |
|
"loss": 9.6002, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.8731634182908545, |
|
"grad_norm": 0.28134414553642273, |
|
"learning_rate": 4.978156072081669e-06, |
|
"loss": 9.5976, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.8743628185907046, |
|
"grad_norm": 0.2796195149421692, |
|
"learning_rate": 4.885353561990752e-06, |
|
"loss": 9.6045, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.8755622188905547, |
|
"grad_norm": 0.2702418863773346, |
|
"learning_rate": 4.793379773558815e-06, |
|
"loss": 9.611, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.8767616191904049, |
|
"grad_norm": 0.27752310037612915, |
|
"learning_rate": 4.7022363962742514e-06, |
|
"loss": 9.6131, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.8779610194902548, |
|
"grad_norm": 0.27505457401275635, |
|
"learning_rate": 4.6119251043714225e-06, |
|
"loss": 9.6062, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.879160419790105, |
|
"grad_norm": 0.2814129590988159, |
|
"learning_rate": 4.522447556799875e-06, |
|
"loss": 9.6059, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.8803598200899551, |
|
"grad_norm": 0.28014951944351196, |
|
"learning_rate": 4.433805397193969e-06, |
|
"loss": 9.6106, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.881559220389805, |
|
"grad_norm": 0.2873791456222534, |
|
"learning_rate": 4.3460002538425805e-06, |
|
"loss": 9.6109, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.8827586206896552, |
|
"grad_norm": 0.297184020280838, |
|
"learning_rate": 4.2590337396592406e-06, |
|
"loss": 9.614, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.8839580209895053, |
|
"grad_norm": 0.3112678527832031, |
|
"learning_rate": 4.172907452152519e-06, |
|
"loss": 9.6144, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.8851574212893554, |
|
"grad_norm": 0.27748996019363403, |
|
"learning_rate": 4.087622973396665e-06, |
|
"loss": 9.6036, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.8863568215892054, |
|
"grad_norm": 0.268793523311615, |
|
"learning_rate": 4.0031818700025095e-06, |
|
"loss": 9.6002, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.8875562218890555, |
|
"grad_norm": 0.27348071336746216, |
|
"learning_rate": 3.919585693088751e-06, |
|
"loss": 9.5986, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8887556221889056, |
|
"grad_norm": 0.2737883925437927, |
|
"learning_rate": 3.836835978253433e-06, |
|
"loss": 9.5968, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.8899550224887556, |
|
"grad_norm": 0.2754174768924713, |
|
"learning_rate": 3.7549342455457216e-06, |
|
"loss": 9.6053, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.8911544227886057, |
|
"grad_norm": 0.27495238184928894, |
|
"learning_rate": 3.6738819994379945e-06, |
|
"loss": 9.607, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.8923538230884558, |
|
"grad_norm": 0.2809472680091858, |
|
"learning_rate": 3.593680728798238e-06, |
|
"loss": 9.6063, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.8935532233883059, |
|
"grad_norm": 0.2831871211528778, |
|
"learning_rate": 3.5143319068626225e-06, |
|
"loss": 9.6096, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.8947526236881559, |
|
"grad_norm": 0.28572776913642883, |
|
"learning_rate": 3.435836991208524e-06, |
|
"loss": 9.606, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.895952023988006, |
|
"grad_norm": 0.2837792634963989, |
|
"learning_rate": 3.35819742372771e-06, |
|
"loss": 9.608, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.8971514242878561, |
|
"grad_norm": 0.2817115783691406, |
|
"learning_rate": 3.2814146305998107e-06, |
|
"loss": 9.6116, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.8983508245877061, |
|
"grad_norm": 0.3011699914932251, |
|
"learning_rate": 3.2054900222662276e-06, |
|
"loss": 9.6132, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.8995502248875562, |
|
"grad_norm": 0.342312753200531, |
|
"learning_rate": 3.1304249934041017e-06, |
|
"loss": 9.61, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.9007496251874063, |
|
"grad_norm": 0.27040937542915344, |
|
"learning_rate": 3.0562209229008042e-06, |
|
"loss": 9.5958, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.9019490254872564, |
|
"grad_norm": 0.2634391188621521, |
|
"learning_rate": 2.982879173828523e-06, |
|
"loss": 9.6023, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.9031484257871064, |
|
"grad_norm": 0.275547057390213, |
|
"learning_rate": 2.9104010934192794e-06, |
|
"loss": 9.6, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.9043478260869565, |
|
"grad_norm": 0.2732800841331482, |
|
"learning_rate": 2.838788013040139e-06, |
|
"loss": 9.6007, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.9055472263868066, |
|
"grad_norm": 0.27795758843421936, |
|
"learning_rate": 2.768041248168801e-06, |
|
"loss": 9.6015, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.9067466266866566, |
|
"grad_norm": 0.2714845836162567, |
|
"learning_rate": 2.6981620983694057e-06, |
|
"loss": 9.6031, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.9079460269865067, |
|
"grad_norm": 0.29261884093284607, |
|
"learning_rate": 2.6291518472686404e-06, |
|
"loss": 9.6028, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.9091454272863568, |
|
"grad_norm": 0.2895471453666687, |
|
"learning_rate": 2.5610117625322118e-06, |
|
"loss": 9.6029, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.9103448275862069, |
|
"grad_norm": 0.2894986867904663, |
|
"learning_rate": 2.4937430958415278e-06, |
|
"loss": 9.6058, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.9115442278860569, |
|
"grad_norm": 0.2817150950431824, |
|
"learning_rate": 2.427347082870701e-06, |
|
"loss": 9.6065, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.912743628185907, |
|
"grad_norm": 0.2893367111682892, |
|
"learning_rate": 2.361824943263874e-06, |
|
"loss": 9.6136, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.9139430284857571, |
|
"grad_norm": 0.3113311529159546, |
|
"learning_rate": 2.2971778806127996e-06, |
|
"loss": 9.6116, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.9151424287856071, |
|
"grad_norm": 0.26471975445747375, |
|
"learning_rate": 2.233407082434724e-06, |
|
"loss": 9.608, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.9163418290854572, |
|
"grad_norm": 0.2689943015575409, |
|
"learning_rate": 2.1705137201505965e-06, |
|
"loss": 9.6016, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.9175412293853074, |
|
"grad_norm": 0.2714982032775879, |
|
"learning_rate": 2.1084989490635255e-06, |
|
"loss": 9.5975, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.9187406296851575, |
|
"grad_norm": 0.2796451151371002, |
|
"learning_rate": 2.0473639083375795e-06, |
|
"loss": 9.6013, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.9199400299850075, |
|
"grad_norm": 0.2697984278202057, |
|
"learning_rate": 1.9871097209768375e-06, |
|
"loss": 9.6081, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.9211394302848576, |
|
"grad_norm": 0.2762463092803955, |
|
"learning_rate": 1.9277374938047988e-06, |
|
"loss": 9.6034, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.9223388305847077, |
|
"grad_norm": 0.28663188219070435, |
|
"learning_rate": 1.8692483174439946e-06, |
|
"loss": 9.5996, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.9235382308845578, |
|
"grad_norm": 0.28348681330680847, |
|
"learning_rate": 1.8116432662960037e-06, |
|
"loss": 9.6014, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9247376311844078, |
|
"grad_norm": 0.2859058976173401, |
|
"learning_rate": 1.7549233985217074e-06, |
|
"loss": 9.6014, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.9259370314842579, |
|
"grad_norm": 0.2842879295349121, |
|
"learning_rate": 1.6990897560218211e-06, |
|
"loss": 9.6047, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.927136431784108, |
|
"grad_norm": 0.289318323135376, |
|
"learning_rate": 1.644143364417794e-06, |
|
"loss": 9.6067, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.928335832083958, |
|
"grad_norm": 0.29014360904693604, |
|
"learning_rate": 1.5900852330329563e-06, |
|
"loss": 9.6226, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.9295352323838081, |
|
"grad_norm": 0.3719955384731293, |
|
"learning_rate": 1.5369163548739462e-06, |
|
"loss": 9.6146, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.9307346326836582, |
|
"grad_norm": 0.2645496428012848, |
|
"learning_rate": 1.484637706612535e-06, |
|
"loss": 9.6015, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.9319340329835083, |
|
"grad_norm": 0.27676111459732056, |
|
"learning_rate": 1.4332502485676358e-06, |
|
"loss": 9.6031, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.9331334332833583, |
|
"grad_norm": 0.26751890778541565, |
|
"learning_rate": 1.3827549246876625e-06, |
|
"loss": 9.6031, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.9343328335832084, |
|
"grad_norm": 0.26769477128982544, |
|
"learning_rate": 1.333152662533227e-06, |
|
"loss": 9.6071, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.9355322338830585, |
|
"grad_norm": 0.2727934420108795, |
|
"learning_rate": 1.2844443732600576e-06, |
|
"loss": 9.6092, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.9367316341829085, |
|
"grad_norm": 0.2800491452217102, |
|
"learning_rate": 1.2366309516022966e-06, |
|
"loss": 9.6069, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.9379310344827586, |
|
"grad_norm": 0.27642175555229187, |
|
"learning_rate": 1.189713275856047e-06, |
|
"loss": 9.6083, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.9391304347826087, |
|
"grad_norm": 0.2824404239654541, |
|
"learning_rate": 1.1436922078632394e-06, |
|
"loss": 9.6075, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.9403298350824588, |
|
"grad_norm": 0.28561386466026306, |
|
"learning_rate": 1.0985685929958134e-06, |
|
"loss": 9.607, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.9415292353823088, |
|
"grad_norm": 0.29175078868865967, |
|
"learning_rate": 1.0543432601401615e-06, |
|
"loss": 9.6059, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.9427286356821589, |
|
"grad_norm": 0.29381459951400757, |
|
"learning_rate": 1.0110170216819316e-06, |
|
"loss": 9.6138, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.943928035982009, |
|
"grad_norm": 0.30094021558761597, |
|
"learning_rate": 9.685906734910988e-07, |
|
"loss": 9.6111, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.945127436281859, |
|
"grad_norm": 0.27452078461647034, |
|
"learning_rate": 9.270649949073229e-07, |
|
"loss": 9.5987, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.9463268365817091, |
|
"grad_norm": 0.26897814869880676, |
|
"learning_rate": 8.864407487256699e-07, |
|
"loss": 9.6044, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.9475262368815592, |
|
"grad_norm": 0.27936410903930664, |
|
"learning_rate": 8.467186811825623e-07, |
|
"loss": 9.5946, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9487256371814093, |
|
"grad_norm": 0.279367595911026, |
|
"learning_rate": 8.07899521942096e-07, |
|
"loss": 9.6049, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.9499250374812593, |
|
"grad_norm": 0.2792900502681732, |
|
"learning_rate": 7.69983984082634e-07, |
|
"loss": 9.6013, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.9511244377811094, |
|
"grad_norm": 0.27940914034843445, |
|
"learning_rate": 7.329727640837058e-07, |
|
"loss": 9.6057, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.9523238380809596, |
|
"grad_norm": 0.2867107391357422, |
|
"learning_rate": 6.968665418131848e-07, |
|
"loss": 9.6074, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.9535232383808095, |
|
"grad_norm": 0.28246691823005676, |
|
"learning_rate": 6.616659805148695e-07, |
|
"loss": 9.6092, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.9547226386806597, |
|
"grad_norm": 0.2778690755367279, |
|
"learning_rate": 6.273717267962164e-07, |
|
"loss": 9.612, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.9559220389805098, |
|
"grad_norm": 0.2856720983982086, |
|
"learning_rate": 5.93984410616527e-07, |
|
"loss": 9.6044, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.9571214392803599, |
|
"grad_norm": 0.30416610836982727, |
|
"learning_rate": 5.615046452753403e-07, |
|
"loss": 9.6137, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.9583208395802099, |
|
"grad_norm": 0.2967727780342102, |
|
"learning_rate": 5.299330274011916e-07, |
|
"loss": 9.6139, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.95952023988006, |
|
"grad_norm": 0.33488917350769043, |
|
"learning_rate": 4.992701369406161e-07, |
|
"loss": 9.6133, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9607196401799101, |
|
"grad_norm": 0.2625320851802826, |
|
"learning_rate": 4.695165371475463e-07, |
|
"loss": 9.6024, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.9619190404797601, |
|
"grad_norm": 0.27328136563301086, |
|
"learning_rate": 4.4067277457292556e-07, |
|
"loss": 9.5989, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.9631184407796102, |
|
"grad_norm": 0.27370065450668335, |
|
"learning_rate": 4.1273937905467185e-07, |
|
"loss": 9.6009, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.9643178410794603, |
|
"grad_norm": 0.27158039808273315, |
|
"learning_rate": 3.8571686370797443e-07, |
|
"loss": 9.6003, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"grad_norm": 0.2797560691833496, |
|
"learning_rate": 3.5960572491583466e-07, |
|
"loss": 9.6009, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.9667166416791604, |
|
"grad_norm": 0.27934470772743225, |
|
"learning_rate": 3.3440644231995664e-07, |
|
"loss": 9.6051, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.9679160419790105, |
|
"grad_norm": 0.2808217406272888, |
|
"learning_rate": 3.101194788119599e-07, |
|
"loss": 9.606, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.9691154422788606, |
|
"grad_norm": 0.2805561423301697, |
|
"learning_rate": 2.867452805248416e-07, |
|
"loss": 9.6049, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.9703148425787106, |
|
"grad_norm": 0.2780965566635132, |
|
"learning_rate": 2.642842768248055e-07, |
|
"loss": 9.6102, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.9715142428785607, |
|
"grad_norm": 0.28727665543556213, |
|
"learning_rate": 2.4273688030336805e-07, |
|
"loss": 9.6085, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.9727136431784108, |
|
"grad_norm": 0.29147574305534363, |
|
"learning_rate": 2.2210348676977023e-07, |
|
"loss": 9.6056, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.9739130434782609, |
|
"grad_norm": 0.30791252851486206, |
|
"learning_rate": 2.0238447524372205e-07, |
|
"loss": 9.6104, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.9751124437781109, |
|
"grad_norm": 0.27840015292167664, |
|
"learning_rate": 1.8358020794843056e-07, |
|
"loss": 9.5996, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.976311844077961, |
|
"grad_norm": 0.26889273524284363, |
|
"learning_rate": 1.6569103030394938e-07, |
|
"loss": 9.6008, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.9775112443778111, |
|
"grad_norm": 0.2736169397830963, |
|
"learning_rate": 1.48717270920834e-07, |
|
"loss": 9.5996, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.9787106446776612, |
|
"grad_norm": 0.27559977769851685, |
|
"learning_rate": 1.3265924159410192e-07, |
|
"loss": 9.5988, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.9799100449775112, |
|
"grad_norm": 0.27969279885292053, |
|
"learning_rate": 1.1751723729750974e-07, |
|
"loss": 9.5987, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.9811094452773613, |
|
"grad_norm": 0.28211551904678345, |
|
"learning_rate": 1.0329153617812947e-07, |
|
"loss": 9.5975, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.9823088455772114, |
|
"grad_norm": 0.27674898505210876, |
|
"learning_rate": 8.998239955124721e-08, |
|
"loss": 9.6071, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.9835082458770614, |
|
"grad_norm": 0.27468326687812805, |
|
"learning_rate": 7.759007189555579e-08, |
|
"loss": 9.6077, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.9847076461769115, |
|
"grad_norm": 0.2778529226779938, |
|
"learning_rate": 6.611478084866951e-08, |
|
"loss": 9.6102, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.9859070464767616, |
|
"grad_norm": 0.27971234917640686, |
|
"learning_rate": 5.555673720292753e-08, |
|
"loss": 9.6097, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.9871064467766117, |
|
"grad_norm": 0.29513809084892273, |
|
"learning_rate": 4.5916134901552443e-08, |
|
"loss": 9.6067, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.9883058470764617, |
|
"grad_norm": 0.2978692352771759, |
|
"learning_rate": 3.7193151035047616e-08, |
|
"loss": 9.6096, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.9895052473763118, |
|
"grad_norm": 0.46041354537010193, |
|
"learning_rate": 2.93879458379831e-08, |
|
"loss": 9.615, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.990704647676162, |
|
"grad_norm": 0.270831823348999, |
|
"learning_rate": 2.2500662686025797e-08, |
|
"loss": 9.5977, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.991904047976012, |
|
"grad_norm": 0.2735693156719208, |
|
"learning_rate": 1.653142809331376e-08, |
|
"loss": 9.6035, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.993103448275862, |
|
"grad_norm": 0.27037888765335083, |
|
"learning_rate": 1.148035171014139e-08, |
|
"loss": 9.6053, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.9943028485757122, |
|
"grad_norm": 0.27673718333244324, |
|
"learning_rate": 7.347526320927723e-09, |
|
"loss": 9.6065, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.9955022488755623, |
|
"grad_norm": 0.29478275775909424, |
|
"learning_rate": 4.133027842517789e-09, |
|
"loss": 9.605, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9967016491754123, |
|
"grad_norm": 0.2775980830192566, |
|
"learning_rate": 1.8369153228114944e-09, |
|
"loss": 9.6047, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.9979010494752624, |
|
"grad_norm": 0.27957436442375183, |
|
"learning_rate": 4.5923093963118335e-10, |
|
"loss": 9.6086, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.9991004497751125, |
|
"grad_norm": 0.2953091561794281, |
|
"learning_rate": 0.0, |
|
"loss": 9.6163, |
|
"step": 833 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 833, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 239, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 691668038713344.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|