|
{ |
|
"best_metric": 0.07686587423086166, |
|
"best_model_checkpoint": "/teamspace/studios/this_studio/save/LLama_End/checkpoint-3500", |
|
"epoch": 1.9991833401388321, |
|
"eval_steps": 500, |
|
"global_step": 3672, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0027221995372260785, |
|
"grad_norm": 0.3611927926540375, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.7652, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005444399074452157, |
|
"grad_norm": 0.3633479177951813, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7798, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008166598611678236, |
|
"grad_norm": 0.31540995836257935, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7635, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.010888798148904314, |
|
"grad_norm": 0.3314383924007416, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7475, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013610997686130393, |
|
"grad_norm": 0.391197144985199, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7827, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.016333197223356473, |
|
"grad_norm": 0.39311718940734863, |
|
"learning_rate": 3e-06, |
|
"loss": 0.7985, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01905539676058255, |
|
"grad_norm": 0.34564024209976196, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"loss": 0.7608, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.021777596297808628, |
|
"grad_norm": 0.43378740549087524, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.7566, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02449979583503471, |
|
"grad_norm": 0.38521063327789307, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.75, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.027221995372260787, |
|
"grad_norm": 0.4021587371826172, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7492, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.029944194909486865, |
|
"grad_norm": 0.44062522053718567, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.7208, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.032666394446712946, |
|
"grad_norm": 0.37620142102241516, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6911, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03538859398393902, |
|
"grad_norm": 0.27264639735221863, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.6624, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0381107935211651, |
|
"grad_norm": 0.2566782832145691, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.6326, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04083299305839118, |
|
"grad_norm": 0.27515003085136414, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.6135, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.043555192595617256, |
|
"grad_norm": 0.2592063248157501, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.5585, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.046277392132843334, |
|
"grad_norm": 0.22615216672420502, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 0.5482, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04899959167006942, |
|
"grad_norm": 0.21914538741111755, |
|
"learning_rate": 9e-06, |
|
"loss": 0.5488, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.051721791207295496, |
|
"grad_norm": 0.23039759695529938, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.4816, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.054443990744521574, |
|
"grad_norm": 0.21080121397972107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4686, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05716619028174765, |
|
"grad_norm": 0.19633150100708008, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.4426, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05988838981897373, |
|
"grad_norm": 0.20176054537296295, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.4131, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06261058935619981, |
|
"grad_norm": 0.1965443640947342, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 0.394, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06533278889342589, |
|
"grad_norm": 0.18852515518665314, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.3621, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06805498843065197, |
|
"grad_norm": 0.1972607970237732, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.3436, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07077718796787805, |
|
"grad_norm": 0.1883157640695572, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.3235, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07349938750510412, |
|
"grad_norm": 0.20938365161418915, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 0.3093, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0762215870423302, |
|
"grad_norm": 0.2136233150959015, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.281, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07894378657955628, |
|
"grad_norm": 0.23373591899871826, |
|
"learning_rate": 1.45e-05, |
|
"loss": 0.2645, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08166598611678236, |
|
"grad_norm": 0.21917635202407837, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2399, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08438818565400844, |
|
"grad_norm": 0.2619529068470001, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.2381, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08711038519123451, |
|
"grad_norm": 0.2083514928817749, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.2139, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08983258472846059, |
|
"grad_norm": 0.22957220673561096, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.1956, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09255478426568667, |
|
"grad_norm": 0.270378977060318, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.1906, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09527698380291276, |
|
"grad_norm": 0.24123062193393707, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1811, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09799918334013884, |
|
"grad_norm": 0.2726229429244995, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.1756, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10072138287736492, |
|
"grad_norm": 0.24878880381584167, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.1745, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10344358241459099, |
|
"grad_norm": 0.22229672968387604, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.1641, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10616578195181707, |
|
"grad_norm": 0.25277239084243774, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 0.1478, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.10888798148904315, |
|
"grad_norm": 0.23793649673461914, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1621, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11161018102626923, |
|
"grad_norm": 0.22567716240882874, |
|
"learning_rate": 2.05e-05, |
|
"loss": 0.1416, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1143323805634953, |
|
"grad_norm": 0.24615788459777832, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.1418, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11705458010072138, |
|
"grad_norm": 0.23229427635669708, |
|
"learning_rate": 2.15e-05, |
|
"loss": 0.137, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.11977677963794746, |
|
"grad_norm": 0.2679113447666168, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.1186, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12249897917517354, |
|
"grad_norm": 0.24723300337791443, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1243, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.12522117871239963, |
|
"grad_norm": 0.24604055285453796, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.1472, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1279433782496257, |
|
"grad_norm": 0.2207736223936081, |
|
"learning_rate": 2.35e-05, |
|
"loss": 0.1258, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.13066557778685178, |
|
"grad_norm": 0.2685292065143585, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.1407, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13338777732407786, |
|
"grad_norm": 0.21564778685569763, |
|
"learning_rate": 2.45e-05, |
|
"loss": 0.1355, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.13610997686130394, |
|
"grad_norm": 0.22922654449939728, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1298, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13883217639853002, |
|
"grad_norm": 0.21827565133571625, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"loss": 0.1256, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.1415543759357561, |
|
"grad_norm": 0.2129678726196289, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.1435, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14427657547298217, |
|
"grad_norm": 0.196010559797287, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"loss": 0.1353, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.14699877501020825, |
|
"grad_norm": 0.22913923859596252, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.1187, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14972097454743433, |
|
"grad_norm": 0.26281264424324036, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1697, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1524431740846604, |
|
"grad_norm": 0.28295812010765076, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.1229, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15516537362188648, |
|
"grad_norm": 0.23690921068191528, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.1086, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.15788757315911256, |
|
"grad_norm": 0.22863948345184326, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.1064, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16060977269633864, |
|
"grad_norm": 0.1989349126815796, |
|
"learning_rate": 2.95e-05, |
|
"loss": 0.1145, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.16333197223356472, |
|
"grad_norm": 0.2325228750705719, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1181, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1660541717707908, |
|
"grad_norm": 0.2611011266708374, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.1093, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.16877637130801687, |
|
"grad_norm": 0.23333916068077087, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.1119, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17149857084524295, |
|
"grad_norm": 0.22769390046596527, |
|
"learning_rate": 3.15e-05, |
|
"loss": 0.1079, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.17422077038246903, |
|
"grad_norm": 0.23360049724578857, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.1035, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1769429699196951, |
|
"grad_norm": 0.2405271828174591, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.1208, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.17966516945692118, |
|
"grad_norm": 0.2959926128387451, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.113, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18238736899414726, |
|
"grad_norm": 0.2246614694595337, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.1082, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.18510956853137334, |
|
"grad_norm": 0.23407892882823944, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.103, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18783176806859944, |
|
"grad_norm": 0.21018989384174347, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.1084, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.19055396760582552, |
|
"grad_norm": 0.1964080035686493, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1021, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1932761671430516, |
|
"grad_norm": 0.2037973254919052, |
|
"learning_rate": 3.55e-05, |
|
"loss": 0.1088, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.19599836668027767, |
|
"grad_norm": 0.2156139761209488, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1104, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19872056621750375, |
|
"grad_norm": 0.21694843471050262, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.1073, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.20144276575472983, |
|
"grad_norm": 0.21144092082977295, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.1365, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2041649652919559, |
|
"grad_norm": 0.24178194999694824, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0984, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.20688716482918199, |
|
"grad_norm": 0.20173633098602295, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.1289, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20960936436640806, |
|
"grad_norm": 0.2253100723028183, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.1055, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.21233156390363414, |
|
"grad_norm": 0.2524998188018799, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.1203, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21505376344086022, |
|
"grad_norm": 0.21095526218414307, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 0.0965, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2177759629780863, |
|
"grad_norm": 0.1919548511505127, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0961, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22049816251531237, |
|
"grad_norm": 0.2088468074798584, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.0945, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.22322036205253845, |
|
"grad_norm": 0.22967375814914703, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0991, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22594256158976453, |
|
"grad_norm": 0.19900205731391907, |
|
"learning_rate": 4.15e-05, |
|
"loss": 0.1069, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2286647611269906, |
|
"grad_norm": 0.22003936767578125, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.1037, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23138696066421668, |
|
"grad_norm": 0.21360959112644196, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.0952, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.23410916020144276, |
|
"grad_norm": 0.1812208890914917, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0962, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23683135973866884, |
|
"grad_norm": 0.19043520092964172, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.1075, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.23955355927589492, |
|
"grad_norm": 0.2013886421918869, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.104, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.242275758813121, |
|
"grad_norm": 0.1831788420677185, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 0.1012, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.24499795835034707, |
|
"grad_norm": 0.1884724646806717, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1115, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24772015788757315, |
|
"grad_norm": 0.1994744837284088, |
|
"learning_rate": 4.55e-05, |
|
"loss": 0.1006, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.25044235742479926, |
|
"grad_norm": 0.1783529371023178, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0902, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 0.180820032954216, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 0.0937, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2558867564992514, |
|
"grad_norm": 0.19279232621192932, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.0944, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2586089560364775, |
|
"grad_norm": 0.1725597232580185, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.104, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.26133115557370357, |
|
"grad_norm": 0.17777124047279358, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.0889, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26405335511092964, |
|
"grad_norm": 0.18942312896251678, |
|
"learning_rate": 4.85e-05, |
|
"loss": 0.0964, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2667755546481557, |
|
"grad_norm": 0.17807991802692413, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.1038, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2694977541853818, |
|
"grad_norm": 0.21202729642391205, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 0.0941, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2722199537226079, |
|
"grad_norm": 0.18360304832458496, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1013, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2722199537226079, |
|
"eval_loss": 0.09772992134094238, |
|
"eval_runtime": 271.0226, |
|
"eval_samples_per_second": 2.214, |
|
"eval_steps_per_second": 0.553, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27494215325983395, |
|
"grad_norm": 0.1730509251356125, |
|
"learning_rate": 4.999969346326857e-05, |
|
"loss": 0.098, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.27766435279706003, |
|
"grad_norm": 0.17720815539360046, |
|
"learning_rate": 4.9998773860591444e-05, |
|
"loss": 0.0912, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2803865523342861, |
|
"grad_norm": 0.16912730038166046, |
|
"learning_rate": 4.9997241214519986e-05, |
|
"loss": 0.0885, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2831087518715122, |
|
"grad_norm": 0.166640505194664, |
|
"learning_rate": 4.999509556263919e-05, |
|
"loss": 0.1013, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28583095140873827, |
|
"grad_norm": 0.1893271952867508, |
|
"learning_rate": 4.999233695756673e-05, |
|
"loss": 0.1005, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.28855315094596434, |
|
"grad_norm": 0.1966632455587387, |
|
"learning_rate": 4.998896546695172e-05, |
|
"loss": 0.0958, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2912753504831904, |
|
"grad_norm": 0.16383662819862366, |
|
"learning_rate": 4.998498117347302e-05, |
|
"loss": 0.0971, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.2939975500204165, |
|
"grad_norm": 0.21534255146980286, |
|
"learning_rate": 4.998038417483721e-05, |
|
"loss": 0.1057, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2967197495576426, |
|
"grad_norm": 0.16162091493606567, |
|
"learning_rate": 4.9975174583776196e-05, |
|
"loss": 0.0943, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.29944194909486865, |
|
"grad_norm": 0.1837451457977295, |
|
"learning_rate": 4.996935252804448e-05, |
|
"loss": 0.0968, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.30216414863209473, |
|
"grad_norm": 0.15664127469062805, |
|
"learning_rate": 4.9962918150415946e-05, |
|
"loss": 0.0975, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.3048863481693208, |
|
"grad_norm": 0.16413035988807678, |
|
"learning_rate": 4.995587160868047e-05, |
|
"loss": 0.0925, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3076085477065469, |
|
"grad_norm": 0.16700997948646545, |
|
"learning_rate": 4.994821307563995e-05, |
|
"loss": 0.097, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.31033074724377296, |
|
"grad_norm": 0.16121503710746765, |
|
"learning_rate": 4.9939942739104105e-05, |
|
"loss": 0.0996, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.31305294678099904, |
|
"grad_norm": 0.16212740540504456, |
|
"learning_rate": 4.993106080188592e-05, |
|
"loss": 0.0888, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3157751463182251, |
|
"grad_norm": 0.1662980020046234, |
|
"learning_rate": 4.9921567481796585e-05, |
|
"loss": 0.0994, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3184973458554512, |
|
"grad_norm": 0.1845940202474594, |
|
"learning_rate": 4.9911463011640195e-05, |
|
"loss": 0.0976, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.3212195453926773, |
|
"grad_norm": 0.17079226672649384, |
|
"learning_rate": 4.9900747639208044e-05, |
|
"loss": 0.0947, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.32394174492990335, |
|
"grad_norm": 0.1642676740884781, |
|
"learning_rate": 4.9889421627272575e-05, |
|
"loss": 0.1039, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.32666394446712943, |
|
"grad_norm": 0.20938318967819214, |
|
"learning_rate": 4.987748525358087e-05, |
|
"loss": 0.1005, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3293861440043555, |
|
"grad_norm": 0.17017363011837006, |
|
"learning_rate": 4.9864938810847884e-05, |
|
"loss": 0.0941, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.3321083435415816, |
|
"grad_norm": 0.17874999344348907, |
|
"learning_rate": 4.985178260674927e-05, |
|
"loss": 0.1055, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.33483054307880766, |
|
"grad_norm": 0.16853144764900208, |
|
"learning_rate": 4.98380169639138e-05, |
|
"loss": 0.0941, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.33755274261603374, |
|
"grad_norm": 0.18321168422698975, |
|
"learning_rate": 4.98236422199155e-05, |
|
"loss": 0.0991, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3402749421532598, |
|
"grad_norm": 0.16565477848052979, |
|
"learning_rate": 4.980865872726532e-05, |
|
"loss": 0.0979, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3429971416904859, |
|
"grad_norm": 0.19292525947093964, |
|
"learning_rate": 4.9793066853402536e-05, |
|
"loss": 0.0957, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.345719341227712, |
|
"grad_norm": 0.1700926125049591, |
|
"learning_rate": 4.977686698068572e-05, |
|
"loss": 0.0889, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.34844154076493805, |
|
"grad_norm": 0.15266568958759308, |
|
"learning_rate": 4.976005950638334e-05, |
|
"loss": 0.0928, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.35116374030216413, |
|
"grad_norm": 0.15712082386016846, |
|
"learning_rate": 4.974264484266406e-05, |
|
"loss": 0.0927, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.3538859398393902, |
|
"grad_norm": 0.15186965465545654, |
|
"learning_rate": 4.972462341658661e-05, |
|
"loss": 0.0944, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3566081393766163, |
|
"grad_norm": 0.18069899082183838, |
|
"learning_rate": 4.970599567008931e-05, |
|
"loss": 0.0998, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.35933033891384236, |
|
"grad_norm": 0.14533384144306183, |
|
"learning_rate": 4.968676205997925e-05, |
|
"loss": 0.0981, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.36205253845106844, |
|
"grad_norm": 0.16364873945713043, |
|
"learning_rate": 4.966692305792106e-05, |
|
"loss": 0.0932, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3647747379882945, |
|
"grad_norm": 0.18472006916999817, |
|
"learning_rate": 4.9646479150425376e-05, |
|
"loss": 0.0866, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3674969375255206, |
|
"grad_norm": 0.15847539901733398, |
|
"learning_rate": 4.962543083883687e-05, |
|
"loss": 0.0837, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3702191370627467, |
|
"grad_norm": 0.155991330742836, |
|
"learning_rate": 4.9603778639322004e-05, |
|
"loss": 0.0906, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.37294133659997275, |
|
"grad_norm": 0.16835445165634155, |
|
"learning_rate": 4.958152308285633e-05, |
|
"loss": 0.0949, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3756635361371989, |
|
"grad_norm": 0.18460558354854584, |
|
"learning_rate": 4.95586647152115e-05, |
|
"loss": 0.0931, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37838573567442496, |
|
"grad_norm": 0.17255567014217377, |
|
"learning_rate": 4.9535204096941854e-05, |
|
"loss": 0.087, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.38110793521165104, |
|
"grad_norm": 0.15889868140220642, |
|
"learning_rate": 4.951114180337069e-05, |
|
"loss": 0.0965, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3838301347488771, |
|
"grad_norm": 0.1403263658285141, |
|
"learning_rate": 4.948647842457615e-05, |
|
"loss": 0.0862, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3865523342861032, |
|
"grad_norm": 0.16774417459964752, |
|
"learning_rate": 4.946121456537676e-05, |
|
"loss": 0.0911, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38927453382332927, |
|
"grad_norm": 0.16498145461082458, |
|
"learning_rate": 4.9435350845316575e-05, |
|
"loss": 0.0851, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.39199673336055535, |
|
"grad_norm": 0.15889093279838562, |
|
"learning_rate": 4.9408887898650036e-05, |
|
"loss": 0.0836, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3947189328977814, |
|
"grad_norm": 0.14469870924949646, |
|
"learning_rate": 4.9381826374326336e-05, |
|
"loss": 0.1033, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3974411324350075, |
|
"grad_norm": 0.1625497043132782, |
|
"learning_rate": 4.935416693597358e-05, |
|
"loss": 0.0862, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4001633319722336, |
|
"grad_norm": 0.15590594708919525, |
|
"learning_rate": 4.932591026188247e-05, |
|
"loss": 0.0946, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.40288553150945966, |
|
"grad_norm": 0.1706922948360443, |
|
"learning_rate": 4.929705704498969e-05, |
|
"loss": 0.0913, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.40560773104668574, |
|
"grad_norm": 0.15704989433288574, |
|
"learning_rate": 4.9267607992860906e-05, |
|
"loss": 0.0888, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.4083299305839118, |
|
"grad_norm": 0.14272324740886688, |
|
"learning_rate": 4.9237563827673416e-05, |
|
"loss": 0.0845, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4110521301211379, |
|
"grad_norm": 0.1592000275850296, |
|
"learning_rate": 4.9206925286198426e-05, |
|
"loss": 0.09, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.41377432965836397, |
|
"grad_norm": 0.16091205179691315, |
|
"learning_rate": 4.9175693119783013e-05, |
|
"loss": 0.0855, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.41649652919559005, |
|
"grad_norm": 0.13851270079612732, |
|
"learning_rate": 4.914386809433167e-05, |
|
"loss": 0.0946, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4192187287328161, |
|
"grad_norm": 0.15238268673419952, |
|
"learning_rate": 4.911145099028753e-05, |
|
"loss": 0.0861, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4219409282700422, |
|
"grad_norm": 0.15689148008823395, |
|
"learning_rate": 4.9078442602613265e-05, |
|
"loss": 0.088, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4246631278072683, |
|
"grad_norm": 0.17214879393577576, |
|
"learning_rate": 4.9044843740771505e-05, |
|
"loss": 0.0882, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.42738532734449436, |
|
"grad_norm": 0.15415912866592407, |
|
"learning_rate": 4.901065522870511e-05, |
|
"loss": 0.0978, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.43010752688172044, |
|
"grad_norm": 0.165154829621315, |
|
"learning_rate": 4.897587790481683e-05, |
|
"loss": 0.0833, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4328297264189465, |
|
"grad_norm": 0.15241703391075134, |
|
"learning_rate": 4.894051262194885e-05, |
|
"loss": 0.0944, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.4355519259561726, |
|
"grad_norm": 0.13514836132526398, |
|
"learning_rate": 4.8904560247361833e-05, |
|
"loss": 0.1036, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43827412549339867, |
|
"grad_norm": 0.14687085151672363, |
|
"learning_rate": 4.886802166271364e-05, |
|
"loss": 0.1042, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.44099632503062475, |
|
"grad_norm": 0.143589586019516, |
|
"learning_rate": 4.8830897764037744e-05, |
|
"loss": 0.0771, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4437185245678508, |
|
"grad_norm": 0.16582971811294556, |
|
"learning_rate": 4.879318946172123e-05, |
|
"loss": 0.0992, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.4464407241050769, |
|
"grad_norm": 0.1646450310945511, |
|
"learning_rate": 4.875489768048247e-05, |
|
"loss": 0.0959, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.449162923642303, |
|
"grad_norm": 0.12446028739213943, |
|
"learning_rate": 4.871602335934847e-05, |
|
"loss": 0.0817, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.45188512317952906, |
|
"grad_norm": 0.15374915301799774, |
|
"learning_rate": 4.867656745163182e-05, |
|
"loss": 0.0763, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.45460732271675514, |
|
"grad_norm": 0.1652667224407196, |
|
"learning_rate": 4.8636530924907296e-05, |
|
"loss": 0.1026, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.4573295222539812, |
|
"grad_norm": 0.13503149151802063, |
|
"learning_rate": 4.85959147609882e-05, |
|
"loss": 0.0986, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4600517217912073, |
|
"grad_norm": 0.16977562010288239, |
|
"learning_rate": 4.855471995590222e-05, |
|
"loss": 0.1099, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.46277392132843337, |
|
"grad_norm": 0.14991120994091034, |
|
"learning_rate": 4.851294751986702e-05, |
|
"loss": 0.0839, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.46549612086565945, |
|
"grad_norm": 0.15737441182136536, |
|
"learning_rate": 4.84705984772655e-05, |
|
"loss": 0.097, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.4682183204028855, |
|
"grad_norm": 0.14336347579956055, |
|
"learning_rate": 4.8427673866620615e-05, |
|
"loss": 0.0837, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4709405199401116, |
|
"grad_norm": 0.12957650423049927, |
|
"learning_rate": 4.8384174740569944e-05, |
|
"loss": 0.0871, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4736627194773377, |
|
"grad_norm": 0.15204955637454987, |
|
"learning_rate": 4.83401021658399e-05, |
|
"loss": 0.0905, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.47638491901456376, |
|
"grad_norm": 0.1311759650707245, |
|
"learning_rate": 4.82954572232195e-05, |
|
"loss": 0.1187, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.47910711855178983, |
|
"grad_norm": 0.14466483891010284, |
|
"learning_rate": 4.825024100753395e-05, |
|
"loss": 0.0892, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4818293180890159, |
|
"grad_norm": 0.14845815300941467, |
|
"learning_rate": 4.820445462761771e-05, |
|
"loss": 0.0865, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.484551517626242, |
|
"grad_norm": 0.15656766295433044, |
|
"learning_rate": 4.815809920628738e-05, |
|
"loss": 0.0899, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.48727371716346807, |
|
"grad_norm": 0.11957818269729614, |
|
"learning_rate": 4.8111175880314084e-05, |
|
"loss": 0.0767, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.48999591670069415, |
|
"grad_norm": 0.14076441526412964, |
|
"learning_rate": 4.806368580039566e-05, |
|
"loss": 0.0832, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4927181162379202, |
|
"grad_norm": 0.15563461184501648, |
|
"learning_rate": 4.801563013112844e-05, |
|
"loss": 0.0873, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.4954403157751463, |
|
"grad_norm": 0.13865019381046295, |
|
"learning_rate": 4.7967010050978635e-05, |
|
"loss": 0.1083, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4981625153123724, |
|
"grad_norm": 0.1487104445695877, |
|
"learning_rate": 4.791782675225348e-05, |
|
"loss": 0.0853, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.5008847148495985, |
|
"grad_norm": 0.14385932683944702, |
|
"learning_rate": 4.7868081441071975e-05, |
|
"loss": 0.0882, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5036069143868246, |
|
"grad_norm": 0.144570991396904, |
|
"learning_rate": 4.781777533733534e-05, |
|
"loss": 0.0859, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 0.1305367350578308, |
|
"learning_rate": 4.776690967469708e-05, |
|
"loss": 0.0788, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5090513134612767, |
|
"grad_norm": 0.14406158030033112, |
|
"learning_rate": 4.771548570053268e-05, |
|
"loss": 0.075, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.5117735129985028, |
|
"grad_norm": 0.14757996797561646, |
|
"learning_rate": 4.766350467590911e-05, |
|
"loss": 0.0991, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5144957125357289, |
|
"grad_norm": 0.13629554212093353, |
|
"learning_rate": 4.7610967875553846e-05, |
|
"loss": 0.082, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.517217912072955, |
|
"grad_norm": 0.1259639412164688, |
|
"learning_rate": 4.755787658782361e-05, |
|
"loss": 0.0848, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.519940111610181, |
|
"grad_norm": 0.14720089733600616, |
|
"learning_rate": 4.750423211467278e-05, |
|
"loss": 0.083, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.5226623111474071, |
|
"grad_norm": 0.13509656488895416, |
|
"learning_rate": 4.745003577162148e-05, |
|
"loss": 0.0856, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5253845106846332, |
|
"grad_norm": 0.15491896867752075, |
|
"learning_rate": 4.7395288887723296e-05, |
|
"loss": 0.0953, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5281067102218593, |
|
"grad_norm": 0.14023731648921967, |
|
"learning_rate": 4.73399928055327e-05, |
|
"loss": 0.0887, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5308289097590854, |
|
"grad_norm": 0.13751854002475739, |
|
"learning_rate": 4.728414888107211e-05, |
|
"loss": 0.0908, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5335511092963114, |
|
"grad_norm": 0.16226938366889954, |
|
"learning_rate": 4.722775848379866e-05, |
|
"loss": 0.0898, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5362733088335375, |
|
"grad_norm": 0.12918660044670105, |
|
"learning_rate": 4.717082299657058e-05, |
|
"loss": 0.0985, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.5389955083707636, |
|
"grad_norm": 0.14950741827487946, |
|
"learning_rate": 4.711334381561333e-05, |
|
"loss": 0.0894, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5417177079079897, |
|
"grad_norm": 0.158179372549057, |
|
"learning_rate": 4.7055322350485344e-05, |
|
"loss": 0.0823, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.5444399074452158, |
|
"grad_norm": 0.14156126976013184, |
|
"learning_rate": 4.699676002404342e-05, |
|
"loss": 0.0851, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5444399074452158, |
|
"eval_loss": 0.08648888021707535, |
|
"eval_runtime": 254.9955, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.588, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5471621069824418, |
|
"grad_norm": 0.14035391807556152, |
|
"learning_rate": 4.6937658272407905e-05, |
|
"loss": 0.0827, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.5498843065196679, |
|
"grad_norm": 0.1346469521522522, |
|
"learning_rate": 4.6878018544927415e-05, |
|
"loss": 0.0986, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.552606506056894, |
|
"grad_norm": 0.16363918781280518, |
|
"learning_rate": 4.681784230414332e-05, |
|
"loss": 0.0883, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.5553287055941201, |
|
"grad_norm": 0.14541316032409668, |
|
"learning_rate": 4.6757131025753886e-05, |
|
"loss": 0.0881, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5580509051313461, |
|
"grad_norm": 0.1540316641330719, |
|
"learning_rate": 4.6695886198578034e-05, |
|
"loss": 0.0825, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5607731046685722, |
|
"grad_norm": 0.15897440910339355, |
|
"learning_rate": 4.6634109324518914e-05, |
|
"loss": 0.0978, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5634953042057983, |
|
"grad_norm": 0.13335077464580536, |
|
"learning_rate": 4.657180191852701e-05, |
|
"loss": 0.1565, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.5662175037430244, |
|
"grad_norm": 0.1566026508808136, |
|
"learning_rate": 4.6508965508562995e-05, |
|
"loss": 0.0927, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5689397032802505, |
|
"grad_norm": 0.1556750237941742, |
|
"learning_rate": 4.644560163556031e-05, |
|
"loss": 0.0839, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.5716619028174765, |
|
"grad_norm": 0.15351781249046326, |
|
"learning_rate": 4.638171185338729e-05, |
|
"loss": 0.0812, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5743841023547026, |
|
"grad_norm": 0.16494281589984894, |
|
"learning_rate": 4.6317297728809147e-05, |
|
"loss": 0.0889, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.5771063018919287, |
|
"grad_norm": 0.15591537952423096, |
|
"learning_rate": 4.62523608414495e-05, |
|
"loss": 0.0858, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5798285014291548, |
|
"grad_norm": 0.1396636664867401, |
|
"learning_rate": 4.618690278375164e-05, |
|
"loss": 0.0856, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.5825507009663808, |
|
"grad_norm": 0.1384498029947281, |
|
"learning_rate": 4.61209251609395e-05, |
|
"loss": 0.082, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5852729005036069, |
|
"grad_norm": 0.13901008665561676, |
|
"learning_rate": 4.605442959097826e-05, |
|
"loss": 0.0821, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.587995100040833, |
|
"grad_norm": 0.1609126627445221, |
|
"learning_rate": 4.5987417704534697e-05, |
|
"loss": 0.1273, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5907172995780591, |
|
"grad_norm": 0.1475275307893753, |
|
"learning_rate": 4.591989114493718e-05, |
|
"loss": 0.0807, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.5934394991152852, |
|
"grad_norm": 0.14419402182102203, |
|
"learning_rate": 4.5851851568135376e-05, |
|
"loss": 0.0737, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5961616986525112, |
|
"grad_norm": 0.13583903014659882, |
|
"learning_rate": 4.5783300642659644e-05, |
|
"loss": 0.0904, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.5988838981897373, |
|
"grad_norm": 0.1405334770679474, |
|
"learning_rate": 4.571424004958012e-05, |
|
"loss": 0.0863, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6016060977269634, |
|
"grad_norm": 0.1326403170824051, |
|
"learning_rate": 4.564467148246548e-05, |
|
"loss": 0.0839, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.6043282972641895, |
|
"grad_norm": 0.14316369593143463, |
|
"learning_rate": 4.557459664734141e-05, |
|
"loss": 0.0862, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6070504968014155, |
|
"grad_norm": 0.14497336745262146, |
|
"learning_rate": 4.550401726264879e-05, |
|
"loss": 0.0733, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.6097726963386416, |
|
"grad_norm": 0.15112735331058502, |
|
"learning_rate": 4.5432935059201544e-05, |
|
"loss": 0.0712, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6124948958758677, |
|
"grad_norm": 0.1457735002040863, |
|
"learning_rate": 4.536135178014415e-05, |
|
"loss": 0.0808, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6152170954130938, |
|
"grad_norm": 0.14395588636398315, |
|
"learning_rate": 4.528926918090898e-05, |
|
"loss": 0.0863, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6179392949503199, |
|
"grad_norm": 0.13603851199150085, |
|
"learning_rate": 4.521668902917317e-05, |
|
"loss": 0.0848, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.6206614944875459, |
|
"grad_norm": 0.16027645766735077, |
|
"learning_rate": 4.514361310481533e-05, |
|
"loss": 0.0929, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.623383694024772, |
|
"grad_norm": 0.13826999068260193, |
|
"learning_rate": 4.507004319987185e-05, |
|
"loss": 0.0914, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.6261058935619981, |
|
"grad_norm": 0.14140823483467102, |
|
"learning_rate": 4.499598111849299e-05, |
|
"loss": 0.0885, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6288280930992242, |
|
"grad_norm": 0.13926826417446136, |
|
"learning_rate": 4.492142867689861e-05, |
|
"loss": 0.083, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.6315502926364502, |
|
"grad_norm": 0.148858904838562, |
|
"learning_rate": 4.484638770333367e-05, |
|
"loss": 0.0866, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6342724921736763, |
|
"grad_norm": 0.12866345047950745, |
|
"learning_rate": 4.4770860038023335e-05, |
|
"loss": 0.0991, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.6369946917109024, |
|
"grad_norm": 0.13434378802776337, |
|
"learning_rate": 4.4694847533127903e-05, |
|
"loss": 0.079, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6397168912481285, |
|
"grad_norm": 0.13786840438842773, |
|
"learning_rate": 4.461835205269736e-05, |
|
"loss": 0.0842, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6424390907853545, |
|
"grad_norm": 0.12938237190246582, |
|
"learning_rate": 4.454137547262566e-05, |
|
"loss": 0.0771, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 0.13730603456497192, |
|
"learning_rate": 4.446391968060475e-05, |
|
"loss": 0.0831, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.6478834898598067, |
|
"grad_norm": 0.142304465174675, |
|
"learning_rate": 4.4385986576078254e-05, |
|
"loss": 0.0918, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6506056893970328, |
|
"grad_norm": 0.1353188157081604, |
|
"learning_rate": 4.43075780701949e-05, |
|
"loss": 0.0897, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.6533278889342589, |
|
"grad_norm": 0.1252220869064331, |
|
"learning_rate": 4.422869608576167e-05, |
|
"loss": 0.1353, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6560500884714849, |
|
"grad_norm": 0.14703655242919922, |
|
"learning_rate": 4.4149342557196605e-05, |
|
"loss": 0.2083, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.658772288008711, |
|
"grad_norm": 0.13858124613761902, |
|
"learning_rate": 4.406951943048141e-05, |
|
"loss": 0.0824, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6614944875459371, |
|
"grad_norm": 0.1428638994693756, |
|
"learning_rate": 4.3989228663113714e-05, |
|
"loss": 0.0774, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.6642166870831632, |
|
"grad_norm": 0.13073915243148804, |
|
"learning_rate": 4.3908472224059064e-05, |
|
"loss": 0.0788, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6669388866203892, |
|
"grad_norm": 0.14101268351078033, |
|
"learning_rate": 4.3827252093702656e-05, |
|
"loss": 0.0882, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6696610861576153, |
|
"grad_norm": 0.13420827686786652, |
|
"learning_rate": 4.374557026380075e-05, |
|
"loss": 0.0783, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6723832856948414, |
|
"grad_norm": 0.12414020299911499, |
|
"learning_rate": 4.366342873743185e-05, |
|
"loss": 0.0874, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.6751054852320675, |
|
"grad_norm": 0.12837223708629608, |
|
"learning_rate": 4.358082952894753e-05, |
|
"loss": 0.0863, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6778276847692936, |
|
"grad_norm": 0.14144425094127655, |
|
"learning_rate": 4.349777466392313e-05, |
|
"loss": 0.0784, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.6805498843065196, |
|
"grad_norm": 0.13347384333610535, |
|
"learning_rate": 4.341426617910798e-05, |
|
"loss": 0.0823, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6832720838437457, |
|
"grad_norm": 0.1294572502374649, |
|
"learning_rate": 4.3330306122375516e-05, |
|
"loss": 0.0787, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.6859942833809718, |
|
"grad_norm": 0.13768264651298523, |
|
"learning_rate": 4.324589655267306e-05, |
|
"loss": 0.0812, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6887164829181979, |
|
"grad_norm": 0.14431719481945038, |
|
"learning_rate": 4.3161039539971295e-05, |
|
"loss": 0.0917, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.691438682455424, |
|
"grad_norm": 0.13428707420825958, |
|
"learning_rate": 4.307573716521353e-05, |
|
"loss": 0.0866, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.69416088199265, |
|
"grad_norm": 0.14333197474479675, |
|
"learning_rate": 4.298999152026465e-05, |
|
"loss": 0.0822, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6968830815298761, |
|
"grad_norm": 0.13545657694339752, |
|
"learning_rate": 4.2903804707859835e-05, |
|
"loss": 0.1482, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6996052810671022, |
|
"grad_norm": 0.13254009187221527, |
|
"learning_rate": 4.281717884155298e-05, |
|
"loss": 0.0792, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.7023274806043283, |
|
"grad_norm": 0.12849317491054535, |
|
"learning_rate": 4.2730116045664905e-05, |
|
"loss": 0.0909, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7050496801415543, |
|
"grad_norm": 0.15678727626800537, |
|
"learning_rate": 4.264261845523116e-05, |
|
"loss": 0.0902, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.7077718796787804, |
|
"grad_norm": 0.1492096483707428, |
|
"learning_rate": 4.255468821594981e-05, |
|
"loss": 0.0796, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7104940792160065, |
|
"grad_norm": 0.12939919531345367, |
|
"learning_rate": 4.2466327484128685e-05, |
|
"loss": 0.0929, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.7132162787532326, |
|
"grad_norm": 0.13585573434829712, |
|
"learning_rate": 4.2377538426632595e-05, |
|
"loss": 0.0997, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7159384782904586, |
|
"grad_norm": 0.15193824470043182, |
|
"learning_rate": 4.228832322083013e-05, |
|
"loss": 0.0806, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.7186606778276847, |
|
"grad_norm": 0.1575016975402832, |
|
"learning_rate": 4.2198684054540285e-05, |
|
"loss": 0.0877, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7213828773649108, |
|
"grad_norm": 0.12813854217529297, |
|
"learning_rate": 4.210862312597884e-05, |
|
"loss": 0.0802, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7241050769021369, |
|
"grad_norm": 0.13729339838027954, |
|
"learning_rate": 4.201814264370441e-05, |
|
"loss": 0.0816, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.726827276439363, |
|
"grad_norm": 0.12651780247688293, |
|
"learning_rate": 4.192724482656428e-05, |
|
"loss": 0.0812, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.729549475976589, |
|
"grad_norm": 0.13558164238929749, |
|
"learning_rate": 4.1835931903640046e-05, |
|
"loss": 0.0926, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7322716755138151, |
|
"grad_norm": 0.1335664838552475, |
|
"learning_rate": 4.17442061141929e-05, |
|
"loss": 0.078, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.7349938750510412, |
|
"grad_norm": 0.12852634489536285, |
|
"learning_rate": 4.165206970760874e-05, |
|
"loss": 0.0805, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7377160745882673, |
|
"grad_norm": 0.14529520273208618, |
|
"learning_rate": 4.1559524943342985e-05, |
|
"loss": 0.0837, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.7404382741254933, |
|
"grad_norm": 0.14452920854091644, |
|
"learning_rate": 4.1466574090865225e-05, |
|
"loss": 0.0857, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7431604736627194, |
|
"grad_norm": 0.13917720317840576, |
|
"learning_rate": 4.1373219429603473e-05, |
|
"loss": 0.0814, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.7458826731999455, |
|
"grad_norm": 0.11816851049661636, |
|
"learning_rate": 4.127946324888836e-05, |
|
"loss": 0.0743, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7486048727371717, |
|
"grad_norm": 0.14422592520713806, |
|
"learning_rate": 4.118530784789694e-05, |
|
"loss": 0.0768, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7513270722743978, |
|
"grad_norm": 0.14289607107639313, |
|
"learning_rate": 4.109075553559633e-05, |
|
"loss": 0.0806, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7540492718116238, |
|
"grad_norm": 0.14432035386562347, |
|
"learning_rate": 4.099580863068706e-05, |
|
"loss": 0.0915, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.7567714713488499, |
|
"grad_norm": 0.16059063374996185, |
|
"learning_rate": 4.0900469461546235e-05, |
|
"loss": 0.0799, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 0.14347511529922485, |
|
"learning_rate": 4.0804740366170454e-05, |
|
"loss": 0.0899, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.7622158704233021, |
|
"grad_norm": 0.12717047333717346, |
|
"learning_rate": 4.070862369211843e-05, |
|
"loss": 0.0827, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7649380699605282, |
|
"grad_norm": 0.12884029746055603, |
|
"learning_rate": 4.061212179645345e-05, |
|
"loss": 0.0856, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.7676602694977542, |
|
"grad_norm": 0.14441439509391785, |
|
"learning_rate": 4.051523704568557e-05, |
|
"loss": 0.1416, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7703824690349803, |
|
"grad_norm": 0.12841658294200897, |
|
"learning_rate": 4.0417971815713584e-05, |
|
"loss": 0.0827, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.7731046685722064, |
|
"grad_norm": 0.14411351084709167, |
|
"learning_rate": 4.032032849176673e-05, |
|
"loss": 0.0808, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7758268681094325, |
|
"grad_norm": 0.13570953905582428, |
|
"learning_rate": 4.022230946834624e-05, |
|
"loss": 0.08, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7785490676466585, |
|
"grad_norm": 0.13861487805843353, |
|
"learning_rate": 4.012391714916661e-05, |
|
"loss": 0.0867, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7812712671838846, |
|
"grad_norm": 0.14143118262290955, |
|
"learning_rate": 4.0025153947096624e-05, |
|
"loss": 0.0865, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.7839934667211107, |
|
"grad_norm": 0.1405879408121109, |
|
"learning_rate": 3.992602228410023e-05, |
|
"loss": 0.0983, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7867156662583368, |
|
"grad_norm": 0.14435367286205292, |
|
"learning_rate": 3.982652459117707e-05, |
|
"loss": 0.0829, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.7894378657955629, |
|
"grad_norm": 0.14033889770507812, |
|
"learning_rate": 3.972666330830299e-05, |
|
"loss": 0.0903, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7921600653327889, |
|
"grad_norm": 0.13285259902477264, |
|
"learning_rate": 3.9626440884370065e-05, |
|
"loss": 0.0748, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.794882264870015, |
|
"grad_norm": 0.1237478256225586, |
|
"learning_rate": 3.952585977712664e-05, |
|
"loss": 0.083, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7976044644072411, |
|
"grad_norm": 0.14227712154388428, |
|
"learning_rate": 3.942492245311703e-05, |
|
"loss": 0.0926, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.8003266639444672, |
|
"grad_norm": 0.13259918987751007, |
|
"learning_rate": 3.9323631387621015e-05, |
|
"loss": 0.0841, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8030488634816932, |
|
"grad_norm": 0.12796998023986816, |
|
"learning_rate": 3.9221989064593175e-05, |
|
"loss": 0.076, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.8057710630189193, |
|
"grad_norm": 0.13424307107925415, |
|
"learning_rate": 3.9119997976601954e-05, |
|
"loss": 0.097, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8084932625561454, |
|
"grad_norm": 0.12660135328769684, |
|
"learning_rate": 3.9017660624768515e-05, |
|
"loss": 0.0834, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.8112154620933715, |
|
"grad_norm": 0.12413927167654037, |
|
"learning_rate": 3.891497951870545e-05, |
|
"loss": 0.0765, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8139376616305976, |
|
"grad_norm": 0.1326495110988617, |
|
"learning_rate": 3.8811957176455214e-05, |
|
"loss": 0.0832, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.8166598611678236, |
|
"grad_norm": 0.133337140083313, |
|
"learning_rate": 3.870859612442837e-05, |
|
"loss": 0.0819, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8166598611678236, |
|
"eval_loss": 0.08255165070295334, |
|
"eval_runtime": 238.131, |
|
"eval_samples_per_second": 2.52, |
|
"eval_steps_per_second": 0.63, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8193820607050497, |
|
"grad_norm": 0.14871715009212494, |
|
"learning_rate": 3.8604898897341644e-05, |
|
"loss": 0.1878, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.8221042602422758, |
|
"grad_norm": 0.1360073983669281, |
|
"learning_rate": 3.850086803815576e-05, |
|
"loss": 0.0879, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8248264597795019, |
|
"grad_norm": 0.12314160168170929, |
|
"learning_rate": 3.8396506098013076e-05, |
|
"loss": 0.0767, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.8275486593167279, |
|
"grad_norm": 0.14723584055900574, |
|
"learning_rate": 3.829181563617503e-05, |
|
"loss": 0.0972, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.830270858853954, |
|
"grad_norm": 0.14206384122371674, |
|
"learning_rate": 3.81867992199594e-05, |
|
"loss": 0.0839, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.8329930583911801, |
|
"grad_norm": 0.11804749816656113, |
|
"learning_rate": 3.808145942467729e-05, |
|
"loss": 0.0923, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8357152579284062, |
|
"grad_norm": 0.12313738465309143, |
|
"learning_rate": 3.797579883357002e-05, |
|
"loss": 0.0872, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.8384374574656323, |
|
"grad_norm": 0.13978321850299835, |
|
"learning_rate": 3.7869820037745776e-05, |
|
"loss": 0.1243, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8411596570028583, |
|
"grad_norm": 0.12310109287500381, |
|
"learning_rate": 3.776352563611604e-05, |
|
"loss": 0.0839, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.8438818565400844, |
|
"grad_norm": 0.13480614125728607, |
|
"learning_rate": 3.765691823533191e-05, |
|
"loss": 0.0873, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8466040560773105, |
|
"grad_norm": 0.13653361797332764, |
|
"learning_rate": 3.75500004497201e-05, |
|
"loss": 0.0771, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.8493262556145366, |
|
"grad_norm": 0.1283886879682541, |
|
"learning_rate": 3.74427749012189e-05, |
|
"loss": 0.0816, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8520484551517626, |
|
"grad_norm": 0.12764351069927216, |
|
"learning_rate": 3.733524421931385e-05, |
|
"loss": 0.0859, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.8547706546889887, |
|
"grad_norm": 0.1281007081270218, |
|
"learning_rate": 3.722741104097323e-05, |
|
"loss": 0.0806, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8574928542262148, |
|
"grad_norm": 0.1311092972755432, |
|
"learning_rate": 3.711927801058347e-05, |
|
"loss": 0.0792, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.8602150537634409, |
|
"grad_norm": 0.11376714706420898, |
|
"learning_rate": 3.7010847779884204e-05, |
|
"loss": 0.0763, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.862937253300667, |
|
"grad_norm": 0.14167912304401398, |
|
"learning_rate": 3.690212300790333e-05, |
|
"loss": 0.0871, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.865659452837893, |
|
"grad_norm": 0.13373591005802155, |
|
"learning_rate": 3.679310636089174e-05, |
|
"loss": 0.1189, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8683816523751191, |
|
"grad_norm": 0.14792831242084503, |
|
"learning_rate": 3.668380051225794e-05, |
|
"loss": 0.0811, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.8711038519123452, |
|
"grad_norm": 0.14200051128864288, |
|
"learning_rate": 3.657420814250258e-05, |
|
"loss": 0.086, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8738260514495713, |
|
"grad_norm": 0.1501314491033554, |
|
"learning_rate": 3.646433193915257e-05, |
|
"loss": 0.0885, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.8765482509867973, |
|
"grad_norm": 0.10102449357509613, |
|
"learning_rate": 3.635417459669532e-05, |
|
"loss": 0.0738, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8792704505240234, |
|
"grad_norm": 0.12374872714281082, |
|
"learning_rate": 3.624373881651254e-05, |
|
"loss": 0.0883, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.8819926500612495, |
|
"grad_norm": 0.1358879953622818, |
|
"learning_rate": 3.6133027306814085e-05, |
|
"loss": 0.0869, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8847148495984756, |
|
"grad_norm": 0.12947793304920197, |
|
"learning_rate": 3.6022042782571494e-05, |
|
"loss": 0.0868, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8874370491357016, |
|
"grad_norm": 0.14391177892684937, |
|
"learning_rate": 3.591078796545144e-05, |
|
"loss": 0.0829, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8901592486729277, |
|
"grad_norm": 0.13038809597492218, |
|
"learning_rate": 3.579926558374897e-05, |
|
"loss": 0.0888, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.8928814482101538, |
|
"grad_norm": 0.15558017790317535, |
|
"learning_rate": 3.5687478372320576e-05, |
|
"loss": 0.0853, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8956036477473799, |
|
"grad_norm": 0.1262005716562271, |
|
"learning_rate": 3.557542907251718e-05, |
|
"loss": 0.0763, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.898325847284606, |
|
"grad_norm": 0.1222100704908371, |
|
"learning_rate": 3.546312043211687e-05, |
|
"loss": 0.0835, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.901048046821832, |
|
"grad_norm": 0.14368890225887299, |
|
"learning_rate": 3.535055520525753e-05, |
|
"loss": 0.0795, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.9037702463590581, |
|
"grad_norm": 0.13927970826625824, |
|
"learning_rate": 3.52377361523693e-05, |
|
"loss": 0.0838, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9064924458962842, |
|
"grad_norm": 0.14974816143512726, |
|
"learning_rate": 3.512466604010688e-05, |
|
"loss": 0.0834, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.9092146454335103, |
|
"grad_norm": 0.14425551891326904, |
|
"learning_rate": 3.501134764128167e-05, |
|
"loss": 0.0933, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9119368449707363, |
|
"grad_norm": 0.13006910681724548, |
|
"learning_rate": 3.4897783734793794e-05, |
|
"loss": 0.0829, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.9146590445079624, |
|
"grad_norm": 0.10913355648517609, |
|
"learning_rate": 3.478397710556397e-05, |
|
"loss": 0.0775, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9173812440451885, |
|
"grad_norm": 0.13194523751735687, |
|
"learning_rate": 3.466993054446515e-05, |
|
"loss": 0.0762, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.9201034435824146, |
|
"grad_norm": 0.1317387819290161, |
|
"learning_rate": 3.4555646848254136e-05, |
|
"loss": 0.094, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9228256431196407, |
|
"grad_norm": 0.1169929951429367, |
|
"learning_rate": 3.4441128819503e-05, |
|
"loss": 0.0839, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.9255478426568667, |
|
"grad_norm": 0.13555140793323517, |
|
"learning_rate": 3.4326379266530314e-05, |
|
"loss": 0.0831, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9282700421940928, |
|
"grad_norm": 0.1173190027475357, |
|
"learning_rate": 3.421140100333231e-05, |
|
"loss": 0.0842, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.9309922417313189, |
|
"grad_norm": 0.1393524408340454, |
|
"learning_rate": 3.409619684951386e-05, |
|
"loss": 0.1127, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.933714441268545, |
|
"grad_norm": 0.12765948474407196, |
|
"learning_rate": 3.3980769630219354e-05, |
|
"loss": 0.0781, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.936436640805771, |
|
"grad_norm": 0.11775423586368561, |
|
"learning_rate": 3.386512217606339e-05, |
|
"loss": 0.071, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9391588403429971, |
|
"grad_norm": 0.1447712928056717, |
|
"learning_rate": 3.3749257323061376e-05, |
|
"loss": 0.0852, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.9418810398802232, |
|
"grad_norm": 0.14077867567539215, |
|
"learning_rate": 3.3633177912559984e-05, |
|
"loss": 0.0793, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9446032394174493, |
|
"grad_norm": 0.13743150234222412, |
|
"learning_rate": 3.3516886791167444e-05, |
|
"loss": 0.0804, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.9473254389546754, |
|
"grad_norm": 0.13769537210464478, |
|
"learning_rate": 3.34003868106838e-05, |
|
"loss": 0.0865, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9500476384919014, |
|
"grad_norm": 0.1354909986257553, |
|
"learning_rate": 3.328368082803088e-05, |
|
"loss": 0.0793, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.9527698380291275, |
|
"grad_norm": 0.12832538783550262, |
|
"learning_rate": 3.316677170518235e-05, |
|
"loss": 0.0769, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9554920375663536, |
|
"grad_norm": 0.1272445023059845, |
|
"learning_rate": 3.304966230909342e-05, |
|
"loss": 0.0931, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.9582142371035797, |
|
"grad_norm": 0.14885401725769043, |
|
"learning_rate": 3.293235551163063e-05, |
|
"loss": 0.0835, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9609364366408057, |
|
"grad_norm": 0.13052543997764587, |
|
"learning_rate": 3.2814854189501346e-05, |
|
"loss": 0.0866, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.9636586361780318, |
|
"grad_norm": 0.12114793807268143, |
|
"learning_rate": 3.269716122418326e-05, |
|
"loss": 0.0851, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9663808357152579, |
|
"grad_norm": 0.13619346916675568, |
|
"learning_rate": 3.2579279501853744e-05, |
|
"loss": 0.0834, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.969103035252484, |
|
"grad_norm": 0.13156457245349884, |
|
"learning_rate": 3.246121191331902e-05, |
|
"loss": 0.0868, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9718252347897101, |
|
"grad_norm": 0.1379031538963318, |
|
"learning_rate": 3.234296135394329e-05, |
|
"loss": 0.081, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.9745474343269361, |
|
"grad_norm": 0.1381215900182724, |
|
"learning_rate": 3.222453072357777e-05, |
|
"loss": 0.0758, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9772696338641622, |
|
"grad_norm": 0.11675938963890076, |
|
"learning_rate": 3.2105922926489504e-05, |
|
"loss": 0.1002, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.9799918334013883, |
|
"grad_norm": 0.12285912781953812, |
|
"learning_rate": 3.1987140871290236e-05, |
|
"loss": 0.0736, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9827140329386144, |
|
"grad_norm": 0.13735820353031158, |
|
"learning_rate": 3.1868187470864984e-05, |
|
"loss": 0.0779, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.9854362324758404, |
|
"grad_norm": 0.13905146718025208, |
|
"learning_rate": 3.1749065642300674e-05, |
|
"loss": 0.0833, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9881584320130665, |
|
"grad_norm": 0.13085448741912842, |
|
"learning_rate": 3.1629778306814585e-05, |
|
"loss": 0.0859, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.9908806315502926, |
|
"grad_norm": 0.13941799104213715, |
|
"learning_rate": 3.151032838968271e-05, |
|
"loss": 0.0845, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9936028310875187, |
|
"grad_norm": 0.12749053537845612, |
|
"learning_rate": 3.139071882016802e-05, |
|
"loss": 0.0935, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9963250306247448, |
|
"grad_norm": 0.12027429789304733, |
|
"learning_rate": 3.127095253144864e-05, |
|
"loss": 0.077, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9990472301619708, |
|
"grad_norm": 0.13250844180583954, |
|
"learning_rate": 3.11510324605459e-05, |
|
"loss": 0.0795, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.001769429699197, |
|
"grad_norm": 0.12925724685192108, |
|
"learning_rate": 3.103096154825233e-05, |
|
"loss": 0.1021, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.004491629236423, |
|
"grad_norm": 0.1351374089717865, |
|
"learning_rate": 3.0910742739059527e-05, |
|
"loss": 0.0833, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.0072138287736492, |
|
"grad_norm": 0.13750001788139343, |
|
"learning_rate": 3.0790378981085956e-05, |
|
"loss": 0.076, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.0099360283108751, |
|
"grad_norm": 0.1203581839799881, |
|
"learning_rate": 3.0669873226004655e-05, |
|
"loss": 0.0769, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.0126582278481013, |
|
"grad_norm": 0.12651003897190094, |
|
"learning_rate": 3.054922842897084e-05, |
|
"loss": 0.0729, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.0153804273853273, |
|
"grad_norm": 0.14429447054862976, |
|
"learning_rate": 3.0428447548549467e-05, |
|
"loss": 0.0772, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.0181026269225535, |
|
"grad_norm": 0.13154245913028717, |
|
"learning_rate": 3.030753354664262e-05, |
|
"loss": 0.1681, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.0208248264597795, |
|
"grad_norm": 0.11439166218042374, |
|
"learning_rate": 3.018648938841695e-05, |
|
"loss": 0.0753, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0235470259970056, |
|
"grad_norm": 0.12332076579332352, |
|
"learning_rate": 3.00653180422309e-05, |
|
"loss": 0.0746, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.0262692255342316, |
|
"grad_norm": 0.12956227362155914, |
|
"learning_rate": 2.994402247956194e-05, |
|
"loss": 0.0733, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.0289914250714578, |
|
"grad_norm": 0.13542629778385162, |
|
"learning_rate": 2.9822605674933696e-05, |
|
"loss": 0.076, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.0317136246086838, |
|
"grad_norm": 0.14228489995002747, |
|
"learning_rate": 2.9701070605843e-05, |
|
"loss": 0.0756, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.03443582414591, |
|
"grad_norm": 0.13206136226654053, |
|
"learning_rate": 2.957942025268689e-05, |
|
"loss": 0.0733, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.037158023683136, |
|
"grad_norm": 0.14194877445697784, |
|
"learning_rate": 2.945765759868949e-05, |
|
"loss": 0.0837, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.039880223220362, |
|
"grad_norm": 0.1430080085992813, |
|
"learning_rate": 2.933578562982888e-05, |
|
"loss": 0.0814, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.042602422757588, |
|
"grad_norm": 0.12564821541309357, |
|
"learning_rate": 2.9213807334763854e-05, |
|
"loss": 0.0748, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.0453246222948143, |
|
"grad_norm": 0.12261082231998444, |
|
"learning_rate": 2.9091725704760638e-05, |
|
"loss": 0.0668, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.0480468218320402, |
|
"grad_norm": 0.14175981283187866, |
|
"learning_rate": 2.8969543733619554e-05, |
|
"loss": 0.0742, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.0507690213692664, |
|
"grad_norm": 0.1397016942501068, |
|
"learning_rate": 2.884726441760155e-05, |
|
"loss": 0.0716, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.0534912209064924, |
|
"grad_norm": 0.1348811835050583, |
|
"learning_rate": 2.8724890755354783e-05, |
|
"loss": 0.0882, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.0562134204437186, |
|
"grad_norm": 0.11297186464071274, |
|
"learning_rate": 2.8602425747841057e-05, |
|
"loss": 0.0715, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.0589356199809445, |
|
"grad_norm": 0.1416822224855423, |
|
"learning_rate": 2.8479872398262198e-05, |
|
"loss": 0.0805, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.0616578195181707, |
|
"grad_norm": 0.147059828042984, |
|
"learning_rate": 2.8357233711986487e-05, |
|
"loss": 0.0764, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0643800190553967, |
|
"grad_norm": 0.137193962931633, |
|
"learning_rate": 2.8234512696474875e-05, |
|
"loss": 0.0733, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.0671022185926229, |
|
"grad_norm": 0.12843886017799377, |
|
"learning_rate": 2.8111712361207283e-05, |
|
"loss": 0.0751, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0698244181298489, |
|
"grad_norm": 0.1351711004972458, |
|
"learning_rate": 2.7988835717608784e-05, |
|
"loss": 0.0776, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.072546617667075, |
|
"grad_norm": 0.1511557698249817, |
|
"learning_rate": 2.7865885778975743e-05, |
|
"loss": 0.0764, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"grad_norm": 0.12794414162635803, |
|
"learning_rate": 2.774286556040196e-05, |
|
"loss": 0.0792, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.0779910167415272, |
|
"grad_norm": 0.13065066933631897, |
|
"learning_rate": 2.7619778078704685e-05, |
|
"loss": 0.0706, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0807132162787532, |
|
"grad_norm": 0.13252925872802734, |
|
"learning_rate": 2.7496626352350664e-05, |
|
"loss": 0.0826, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.0834354158159794, |
|
"grad_norm": 0.1248321682214737, |
|
"learning_rate": 2.7373413401382104e-05, |
|
"loss": 0.076, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0861576153532053, |
|
"grad_norm": 0.14091134071350098, |
|
"learning_rate": 2.7250142247342637e-05, |
|
"loss": 0.1298, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.0888798148904315, |
|
"grad_norm": 0.13955925405025482, |
|
"learning_rate": 2.7126815913203178e-05, |
|
"loss": 0.0753, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0888798148904315, |
|
"eval_loss": 0.08035612851381302, |
|
"eval_runtime": 228.5599, |
|
"eval_samples_per_second": 2.625, |
|
"eval_steps_per_second": 0.656, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0916020144276575, |
|
"grad_norm": 0.1300753802061081, |
|
"learning_rate": 2.7003437423287857e-05, |
|
"loss": 0.1036, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.0943242139648837, |
|
"grad_norm": 0.12375082820653915, |
|
"learning_rate": 2.688000980319979e-05, |
|
"loss": 0.0744, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.0970464135021096, |
|
"grad_norm": 0.14749085903167725, |
|
"learning_rate": 2.6756536079746907e-05, |
|
"loss": 0.0738, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.0997686130393358, |
|
"grad_norm": 0.13898104429244995, |
|
"learning_rate": 2.663301928086774e-05, |
|
"loss": 0.0701, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.1024908125765618, |
|
"grad_norm": 0.12244424223899841, |
|
"learning_rate": 2.6509462435557152e-05, |
|
"loss": 0.0707, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.105213012113788, |
|
"grad_norm": 0.13717930018901825, |
|
"learning_rate": 2.6385868573792072e-05, |
|
"loss": 0.07, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.107935211651014, |
|
"grad_norm": 0.16028448939323425, |
|
"learning_rate": 2.6262240726457167e-05, |
|
"loss": 0.0815, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.1106574111882401, |
|
"grad_norm": 0.13453496992588043, |
|
"learning_rate": 2.6138581925270533e-05, |
|
"loss": 0.0802, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.113379610725466, |
|
"grad_norm": 0.14346693456172943, |
|
"learning_rate": 2.6014895202709354e-05, |
|
"loss": 0.0777, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.1161018102626923, |
|
"grad_norm": 0.12791860103607178, |
|
"learning_rate": 2.5891183591935515e-05, |
|
"loss": 0.0798, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.1188240097999183, |
|
"grad_norm": 0.1344967633485794, |
|
"learning_rate": 2.5767450126721255e-05, |
|
"loss": 0.0679, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.1215462093371444, |
|
"grad_norm": 0.1335950344800949, |
|
"learning_rate": 2.564369784137472e-05, |
|
"loss": 0.0742, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.1242684088743704, |
|
"grad_norm": 0.12762115895748138, |
|
"learning_rate": 2.5519929770665597e-05, |
|
"loss": 0.0885, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.1269906084115966, |
|
"grad_norm": 0.1343313604593277, |
|
"learning_rate": 2.539614894975067e-05, |
|
"loss": 0.0672, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.1297128079488226, |
|
"grad_norm": 0.13929401338100433, |
|
"learning_rate": 2.5272358414099413e-05, |
|
"loss": 0.0765, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.1324350074860488, |
|
"grad_norm": 0.1336519867181778, |
|
"learning_rate": 2.5148561199419516e-05, |
|
"loss": 0.0728, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.1351572070232747, |
|
"grad_norm": 0.13433049619197845, |
|
"learning_rate": 2.5024760341582453e-05, |
|
"loss": 0.0785, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.137879406560501, |
|
"grad_norm": 0.12692618370056152, |
|
"learning_rate": 2.490095887654906e-05, |
|
"loss": 0.0971, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.1406016060977269, |
|
"grad_norm": 0.1449315845966339, |
|
"learning_rate": 2.4777159840295046e-05, |
|
"loss": 0.0669, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.143323805634953, |
|
"grad_norm": 0.14388057589530945, |
|
"learning_rate": 2.4653366268736565e-05, |
|
"loss": 0.0776, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.146046005172179, |
|
"grad_norm": 0.12486547976732254, |
|
"learning_rate": 2.452958119765577e-05, |
|
"loss": 0.0782, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.1487682047094052, |
|
"grad_norm": 0.135404571890831, |
|
"learning_rate": 2.4405807662626338e-05, |
|
"loss": 0.0792, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.1514904042466312, |
|
"grad_norm": 0.14270424842834473, |
|
"learning_rate": 2.4282048698939062e-05, |
|
"loss": 0.0729, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.1542126037838574, |
|
"grad_norm": 0.13348889350891113, |
|
"learning_rate": 2.41583073415274e-05, |
|
"loss": 0.0842, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1569348033210836, |
|
"grad_norm": 0.15408015251159668, |
|
"learning_rate": 2.403458662489304e-05, |
|
"loss": 0.0903, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.1596570028583095, |
|
"grad_norm": 0.12234120815992355, |
|
"learning_rate": 2.3910889583031533e-05, |
|
"loss": 0.0793, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.1623792023955355, |
|
"grad_norm": 0.15008710324764252, |
|
"learning_rate": 2.3787219249357803e-05, |
|
"loss": 0.0759, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.1651014019327617, |
|
"grad_norm": 0.133488729596138, |
|
"learning_rate": 2.3663578656631858e-05, |
|
"loss": 0.0783, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.1678236014699879, |
|
"grad_norm": 0.12147729843854904, |
|
"learning_rate": 2.3539970836884347e-05, |
|
"loss": 0.0816, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.1705458010072138, |
|
"grad_norm": 0.14398206770420074, |
|
"learning_rate": 2.3416398821342238e-05, |
|
"loss": 0.0739, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.1732680005444398, |
|
"grad_norm": 0.14384213089942932, |
|
"learning_rate": 2.329286564035446e-05, |
|
"loss": 0.0764, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.175990200081666, |
|
"grad_norm": 0.1491377055644989, |
|
"learning_rate": 2.3169374323317624e-05, |
|
"loss": 0.0716, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.1787123996188922, |
|
"grad_norm": 0.13119584321975708, |
|
"learning_rate": 2.3045927898601702e-05, |
|
"loss": 0.0737, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.1814345991561181, |
|
"grad_norm": 0.13395731151103973, |
|
"learning_rate": 2.292252939347577e-05, |
|
"loss": 0.0674, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1841567986933441, |
|
"grad_norm": 0.1500498354434967, |
|
"learning_rate": 2.27991818340338e-05, |
|
"loss": 0.0872, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.1868789982305703, |
|
"grad_norm": 0.15766474604606628, |
|
"learning_rate": 2.2675888245120382e-05, |
|
"loss": 0.1069, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1896011977677965, |
|
"grad_norm": 0.1266118288040161, |
|
"learning_rate": 2.255265165025663e-05, |
|
"loss": 0.0753, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.1923233973050225, |
|
"grad_norm": 0.12611445784568787, |
|
"learning_rate": 2.2429475071565987e-05, |
|
"loss": 0.0704, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1950455968422484, |
|
"grad_norm": 0.1382066011428833, |
|
"learning_rate": 2.2306361529700125e-05, |
|
"loss": 0.081, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.1977677963794746, |
|
"grad_norm": 0.13451717793941498, |
|
"learning_rate": 2.218331404376484e-05, |
|
"loss": 0.0813, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2004899959167008, |
|
"grad_norm": 0.1681748777627945, |
|
"learning_rate": 2.2060335631246075e-05, |
|
"loss": 0.0793, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.2032121954539268, |
|
"grad_norm": 0.14408931136131287, |
|
"learning_rate": 2.1937429307935887e-05, |
|
"loss": 0.0763, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.2059343949911527, |
|
"grad_norm": 0.14158952236175537, |
|
"learning_rate": 2.1814598087858476e-05, |
|
"loss": 0.0882, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.208656594528379, |
|
"grad_norm": 0.12508870661258698, |
|
"learning_rate": 2.16918449831963e-05, |
|
"loss": 0.0762, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.2113787940656051, |
|
"grad_norm": 0.13746081292629242, |
|
"learning_rate": 2.15691730042162e-05, |
|
"loss": 0.0802, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.214100993602831, |
|
"grad_norm": 0.12565705180168152, |
|
"learning_rate": 2.144658515919557e-05, |
|
"loss": 0.071, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.216823193140057, |
|
"grad_norm": 0.13257178664207458, |
|
"learning_rate": 2.1324084454348592e-05, |
|
"loss": 0.069, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.2195453926772832, |
|
"grad_norm": 0.15064120292663574, |
|
"learning_rate": 2.1201673893752534e-05, |
|
"loss": 0.0793, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.2222675922145094, |
|
"grad_norm": 0.1452379673719406, |
|
"learning_rate": 2.107935647927404e-05, |
|
"loss": 0.0653, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.2249897917517354, |
|
"grad_norm": 0.16073830425739288, |
|
"learning_rate": 2.095713521049554e-05, |
|
"loss": 0.0726, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.2277119912889614, |
|
"grad_norm": 0.15698719024658203, |
|
"learning_rate": 2.0835013084641704e-05, |
|
"loss": 0.0781, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.2304341908261875, |
|
"grad_norm": 0.13535834848880768, |
|
"learning_rate": 2.07129930965059e-05, |
|
"loss": 0.0754, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.2331563903634137, |
|
"grad_norm": 0.13440349698066711, |
|
"learning_rate": 2.0591078238376803e-05, |
|
"loss": 0.1275, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.2358785899006397, |
|
"grad_norm": 0.15323837101459503, |
|
"learning_rate": 2.0469271499964993e-05, |
|
"loss": 0.0738, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.238600789437866, |
|
"grad_norm": 0.14659421145915985, |
|
"learning_rate": 2.034757586832961e-05, |
|
"loss": 0.0719, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.2413229889750919, |
|
"grad_norm": 0.14592771232128143, |
|
"learning_rate": 2.022599432780515e-05, |
|
"loss": 0.0793, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.244045188512318, |
|
"grad_norm": 0.12547121942043304, |
|
"learning_rate": 2.0104529859928254e-05, |
|
"loss": 0.0754, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.246767388049544, |
|
"grad_norm": 0.13453277945518494, |
|
"learning_rate": 1.9983185443364615e-05, |
|
"loss": 0.0679, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.2494895875867702, |
|
"grad_norm": 0.13268929719924927, |
|
"learning_rate": 1.9861964053835885e-05, |
|
"loss": 0.0778, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.2522117871239962, |
|
"grad_norm": 0.14037257432937622, |
|
"learning_rate": 1.974086866404675e-05, |
|
"loss": 0.0874, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2549339866612224, |
|
"grad_norm": 0.1446313112974167, |
|
"learning_rate": 1.961990224361201e-05, |
|
"loss": 0.0708, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.2576561861984483, |
|
"grad_norm": 0.15174619853496552, |
|
"learning_rate": 1.949906775898375e-05, |
|
"loss": 0.0841, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2603783857356743, |
|
"grad_norm": 0.14297954738140106, |
|
"learning_rate": 1.9378368173378618e-05, |
|
"loss": 0.0802, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.2631005852729005, |
|
"grad_norm": 0.1344052255153656, |
|
"learning_rate": 1.9257806446705116e-05, |
|
"loss": 0.0675, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.2658227848101267, |
|
"grad_norm": 0.14959311485290527, |
|
"learning_rate": 1.913738553549106e-05, |
|
"loss": 0.0848, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.2685449843473526, |
|
"grad_norm": 0.12404653429985046, |
|
"learning_rate": 1.9017108392811065e-05, |
|
"loss": 0.0687, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.2712671838845788, |
|
"grad_norm": 0.13011127710342407, |
|
"learning_rate": 1.8896977968214078e-05, |
|
"loss": 0.113, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.2739893834218048, |
|
"grad_norm": 0.14289557933807373, |
|
"learning_rate": 1.877699720765114e-05, |
|
"loss": 0.0771, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.276711582959031, |
|
"grad_norm": 0.14011026918888092, |
|
"learning_rate": 1.8657169053403052e-05, |
|
"loss": 0.0771, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.279433782496257, |
|
"grad_norm": 0.1479315608739853, |
|
"learning_rate": 1.8537496444008283e-05, |
|
"loss": 0.07, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.2821559820334831, |
|
"grad_norm": 0.1444329172372818, |
|
"learning_rate": 1.841798231419087e-05, |
|
"loss": 0.0815, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.284878181570709, |
|
"grad_norm": 0.11986621469259262, |
|
"learning_rate": 1.8298629594788467e-05, |
|
"loss": 0.0689, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.2876003811079353, |
|
"grad_norm": 0.13101747632026672, |
|
"learning_rate": 1.817944121268048e-05, |
|
"loss": 0.0728, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 0.1301647424697876, |
|
"learning_rate": 1.8060420090716265e-05, |
|
"loss": 0.0789, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2930447801823874, |
|
"grad_norm": 0.14230471849441528, |
|
"learning_rate": 1.794156914764349e-05, |
|
"loss": 0.0702, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.2957669797196134, |
|
"grad_norm": 0.15737561881542206, |
|
"learning_rate": 1.7822891298036515e-05, |
|
"loss": 0.0781, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2984891792568396, |
|
"grad_norm": 0.16721278429031372, |
|
"learning_rate": 1.7704389452224944e-05, |
|
"loss": 0.0805, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.3012113787940656, |
|
"grad_norm": 0.1321476846933365, |
|
"learning_rate": 1.7586066516222276e-05, |
|
"loss": 0.0768, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.3039335783312918, |
|
"grad_norm": 0.15134398639202118, |
|
"learning_rate": 1.7467925391654585e-05, |
|
"loss": 0.0825, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.3066557778685177, |
|
"grad_norm": 0.14100222289562225, |
|
"learning_rate": 1.73499689756894e-05, |
|
"loss": 0.0823, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.309377977405744, |
|
"grad_norm": 0.1558840423822403, |
|
"learning_rate": 1.7232200160964657e-05, |
|
"loss": 0.0832, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.3121001769429699, |
|
"grad_norm": 0.14366985857486725, |
|
"learning_rate": 1.7114621835517773e-05, |
|
"loss": 0.0791, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.314822376480196, |
|
"grad_norm": 0.12937745451927185, |
|
"learning_rate": 1.699723688271477e-05, |
|
"loss": 0.0703, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.317544576017422, |
|
"grad_norm": 0.12883788347244263, |
|
"learning_rate": 1.6880048181179652e-05, |
|
"loss": 0.0797, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.3202667755546482, |
|
"grad_norm": 0.13123267889022827, |
|
"learning_rate": 1.6763058604723723e-05, |
|
"loss": 0.0727, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.3229889750918742, |
|
"grad_norm": 0.10518030822277069, |
|
"learning_rate": 1.6646271022275185e-05, |
|
"loss": 0.066, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.3257111746291004, |
|
"grad_norm": 0.13036532700061798, |
|
"learning_rate": 1.6529688297808726e-05, |
|
"loss": 0.078, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.3284333741663263, |
|
"grad_norm": 0.14793965220451355, |
|
"learning_rate": 1.6413313290275355e-05, |
|
"loss": 0.0779, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.3311555737035525, |
|
"grad_norm": 0.15071183443069458, |
|
"learning_rate": 1.629714885353221e-05, |
|
"loss": 0.0835, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.3338777732407785, |
|
"grad_norm": 0.13086527585983276, |
|
"learning_rate": 1.618119783627263e-05, |
|
"loss": 0.0759, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.3365999727780047, |
|
"grad_norm": 0.13066627085208893, |
|
"learning_rate": 1.6065463081956292e-05, |
|
"loss": 0.0745, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.3393221723152307, |
|
"grad_norm": 0.15003521740436554, |
|
"learning_rate": 1.5949947428739448e-05, |
|
"loss": 0.084, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.3420443718524568, |
|
"grad_norm": 0.12341570854187012, |
|
"learning_rate": 1.5834653709405368e-05, |
|
"loss": 0.0707, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.3447665713896828, |
|
"grad_norm": 0.1332896649837494, |
|
"learning_rate": 1.571958475129484e-05, |
|
"loss": 0.0701, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.347488770926909, |
|
"grad_norm": 0.12761184573173523, |
|
"learning_rate": 1.5604743376236847e-05, |
|
"loss": 0.0708, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.350210970464135, |
|
"grad_norm": 0.12873777747154236, |
|
"learning_rate": 1.549013240047937e-05, |
|
"loss": 0.0731, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.3529331700013612, |
|
"grad_norm": 0.1278139352798462, |
|
"learning_rate": 1.537575463462031e-05, |
|
"loss": 0.0712, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.3556553695385871, |
|
"grad_norm": 0.16153199970722198, |
|
"learning_rate": 1.526161288353861e-05, |
|
"loss": 0.0825, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.3583775690758133, |
|
"grad_norm": 0.13839781284332275, |
|
"learning_rate": 1.5147709946325395e-05, |
|
"loss": 0.0769, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.3610997686130393, |
|
"grad_norm": 0.1404309719800949, |
|
"learning_rate": 1.5034048616215402e-05, |
|
"loss": 0.0737, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3610997686130393, |
|
"eval_loss": 0.07860012352466583, |
|
"eval_runtime": 248.5723, |
|
"eval_samples_per_second": 2.414, |
|
"eval_steps_per_second": 0.603, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3638219681502655, |
|
"grad_norm": 0.14883238077163696, |
|
"learning_rate": 1.4920631680518432e-05, |
|
"loss": 0.0695, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.3665441676874914, |
|
"grad_norm": 0.14661747217178345, |
|
"learning_rate": 1.4807461920551028e-05, |
|
"loss": 0.0752, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.3692663672247176, |
|
"grad_norm": 0.13054049015045166, |
|
"learning_rate": 1.469454211156826e-05, |
|
"loss": 0.0698, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.3719885667619436, |
|
"grad_norm": 0.13900673389434814, |
|
"learning_rate": 1.4581875022695653e-05, |
|
"loss": 0.0897, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.3747107662991698, |
|
"grad_norm": 0.1521766632795334, |
|
"learning_rate": 1.4469463416861307e-05, |
|
"loss": 0.0751, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.3774329658363957, |
|
"grad_norm": 0.14469105005264282, |
|
"learning_rate": 1.4357310050728115e-05, |
|
"loss": 0.0807, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.380155165373622, |
|
"grad_norm": 0.132577583193779, |
|
"learning_rate": 1.4245417674626183e-05, |
|
"loss": 0.0693, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.382877364910848, |
|
"grad_norm": 0.12606105208396912, |
|
"learning_rate": 1.4133789032485367e-05, |
|
"loss": 0.0739, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.385599564448074, |
|
"grad_norm": 0.1424499899148941, |
|
"learning_rate": 1.4022426861767998e-05, |
|
"loss": 0.0806, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.3883217639853, |
|
"grad_norm": 0.15580779314041138, |
|
"learning_rate": 1.391133389340174e-05, |
|
"loss": 0.0737, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.3910439635225262, |
|
"grad_norm": 0.14672575891017914, |
|
"learning_rate": 1.3800512851712635e-05, |
|
"loss": 0.0746, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.3937661630597522, |
|
"grad_norm": 0.15802086889743805, |
|
"learning_rate": 1.3689966454358256e-05, |
|
"loss": 0.0768, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.3964883625969784, |
|
"grad_norm": 0.1357397735118866, |
|
"learning_rate": 1.3579697412261117e-05, |
|
"loss": 0.0724, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.3992105621342044, |
|
"grad_norm": 0.14769281446933746, |
|
"learning_rate": 1.3469708429542157e-05, |
|
"loss": 0.0738, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.4019327616714305, |
|
"grad_norm": 0.1663895845413208, |
|
"learning_rate": 1.3360002203454442e-05, |
|
"loss": 0.0739, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.4046549612086565, |
|
"grad_norm": 0.13682472705841064, |
|
"learning_rate": 1.325058142431701e-05, |
|
"loss": 0.0732, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.4073771607458827, |
|
"grad_norm": 0.1464831680059433, |
|
"learning_rate": 1.3141448775448875e-05, |
|
"loss": 0.0781, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.4100993602831087, |
|
"grad_norm": 0.14444297552108765, |
|
"learning_rate": 1.3032606933103305e-05, |
|
"loss": 0.0742, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.4128215598203349, |
|
"grad_norm": 0.14417661726474762, |
|
"learning_rate": 1.2924058566402098e-05, |
|
"loss": 0.0761, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.4155437593575608, |
|
"grad_norm": 0.12622784078121185, |
|
"learning_rate": 1.2815806337270186e-05, |
|
"loss": 0.0693, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.418265958894787, |
|
"grad_norm": 0.1415053904056549, |
|
"learning_rate": 1.270785290037031e-05, |
|
"loss": 0.0771, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.4209881584320132, |
|
"grad_norm": 0.14631399512290955, |
|
"learning_rate": 1.260020090303797e-05, |
|
"loss": 0.1018, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.4237103579692392, |
|
"grad_norm": 0.1438084840774536, |
|
"learning_rate": 1.2492852985216483e-05, |
|
"loss": 0.0708, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.4264325575064651, |
|
"grad_norm": 0.12973198294639587, |
|
"learning_rate": 1.2385811779392236e-05, |
|
"loss": 0.0795, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.4291547570436913, |
|
"grad_norm": 0.15162228047847748, |
|
"learning_rate": 1.2279079910530147e-05, |
|
"loss": 0.0811, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.4318769565809175, |
|
"grad_norm": 0.14159157872200012, |
|
"learning_rate": 1.2172659996009254e-05, |
|
"loss": 0.0762, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.4345991561181435, |
|
"grad_norm": 0.13606242835521698, |
|
"learning_rate": 1.2066554645558578e-05, |
|
"loss": 0.0739, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.4373213556553694, |
|
"grad_norm": 0.15046873688697815, |
|
"learning_rate": 1.1960766461193124e-05, |
|
"loss": 0.0663, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.4400435551925956, |
|
"grad_norm": 0.12745556235313416, |
|
"learning_rate": 1.1855298037150022e-05, |
|
"loss": 0.073, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.4427657547298218, |
|
"grad_norm": 0.156600683927536, |
|
"learning_rate": 1.1750151959824961e-05, |
|
"loss": 0.0746, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.4454879542670478, |
|
"grad_norm": 0.13598720729351044, |
|
"learning_rate": 1.1645330807708713e-05, |
|
"loss": 0.0687, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.4482101538042738, |
|
"grad_norm": 0.13264238834381104, |
|
"learning_rate": 1.1540837151323951e-05, |
|
"loss": 0.0814, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.4509323533415, |
|
"grad_norm": 0.13126635551452637, |
|
"learning_rate": 1.143667355316219e-05, |
|
"loss": 0.0768, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.4536545528787261, |
|
"grad_norm": 0.14652326703071594, |
|
"learning_rate": 1.1332842567620941e-05, |
|
"loss": 0.0731, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.456376752415952, |
|
"grad_norm": 0.14913715422153473, |
|
"learning_rate": 1.1229346740941088e-05, |
|
"loss": 0.0893, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.459098951953178, |
|
"grad_norm": 0.15281103551387787, |
|
"learning_rate": 1.1126188611144406e-05, |
|
"loss": 0.0981, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.4618211514904043, |
|
"grad_norm": 0.14536678791046143, |
|
"learning_rate": 1.102337070797137e-05, |
|
"loss": 0.0775, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.4645433510276304, |
|
"grad_norm": 0.14888562262058258, |
|
"learning_rate": 1.0920895552819118e-05, |
|
"loss": 0.0828, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.4672655505648564, |
|
"grad_norm": 0.12814417481422424, |
|
"learning_rate": 1.0818765658679576e-05, |
|
"loss": 0.0766, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.4699877501020824, |
|
"grad_norm": 0.12727950513362885, |
|
"learning_rate": 1.0716983530077843e-05, |
|
"loss": 0.0776, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.4727099496393086, |
|
"grad_norm": 0.12727922201156616, |
|
"learning_rate": 1.0615551663010806e-05, |
|
"loss": 0.0689, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.4754321491765348, |
|
"grad_norm": 0.14102789759635925, |
|
"learning_rate": 1.051447254488591e-05, |
|
"loss": 0.0634, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.4781543487137607, |
|
"grad_norm": 0.14436770975589752, |
|
"learning_rate": 1.0413748654460149e-05, |
|
"loss": 0.0796, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.4808765482509867, |
|
"grad_norm": 0.1291683316230774, |
|
"learning_rate": 1.0313382461779306e-05, |
|
"loss": 0.0755, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.4835987477882129, |
|
"grad_norm": 0.12600617110729218, |
|
"learning_rate": 1.0213376428117333e-05, |
|
"loss": 0.0658, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.486320947325439, |
|
"grad_norm": 0.14813588559627533, |
|
"learning_rate": 1.0113733005916057e-05, |
|
"loss": 0.075, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.489043146862665, |
|
"grad_norm": 0.15321391820907593, |
|
"learning_rate": 1.0014454638724982e-05, |
|
"loss": 0.0774, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.491765346399891, |
|
"grad_norm": 0.13844044506549835, |
|
"learning_rate": 9.915543761141432e-06, |
|
"loss": 0.074, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.4944875459371172, |
|
"grad_norm": 0.14296895265579224, |
|
"learning_rate": 9.81700279875075e-06, |
|
"loss": 0.0796, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.4972097454743434, |
|
"grad_norm": 0.14961597323417664, |
|
"learning_rate": 9.718834168066904e-06, |
|
"loss": 0.0713, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.4999319450115693, |
|
"grad_norm": 0.1663280427455902, |
|
"learning_rate": 9.62104027647319e-06, |
|
"loss": 0.077, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.5026541445487953, |
|
"grad_norm": 0.14651760458946228, |
|
"learning_rate": 9.523623522163197e-06, |
|
"loss": 0.0719, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.5053763440860215, |
|
"grad_norm": 0.14970749616622925, |
|
"learning_rate": 9.426586294082013e-06, |
|
"loss": 0.0776, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.5080985436232477, |
|
"grad_norm": 0.15019264817237854, |
|
"learning_rate": 9.329930971867595e-06, |
|
"loss": 0.0771, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.5108207431604737, |
|
"grad_norm": 0.14440083503723145, |
|
"learning_rate": 9.233659925792477e-06, |
|
"loss": 0.072, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.5135429426976996, |
|
"grad_norm": 0.138129323720932, |
|
"learning_rate": 9.137775516705604e-06, |
|
"loss": 0.0758, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.5162651422349258, |
|
"grad_norm": 0.14640666544437408, |
|
"learning_rate": 9.042280095974434e-06, |
|
"loss": 0.0844, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.518987341772152, |
|
"grad_norm": 0.14141102135181427, |
|
"learning_rate": 8.947176005427324e-06, |
|
"loss": 0.0798, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.521709541309378, |
|
"grad_norm": 0.14003820717334747, |
|
"learning_rate": 8.852465577296015e-06, |
|
"loss": 0.0761, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.524431740846604, |
|
"grad_norm": 0.13643573224544525, |
|
"learning_rate": 8.75815113415852e-06, |
|
"loss": 0.0753, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.5271539403838301, |
|
"grad_norm": 0.13099689781665802, |
|
"learning_rate": 8.66423498888213e-06, |
|
"loss": 0.0764, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.5298761399210563, |
|
"grad_norm": 0.16020213067531586, |
|
"learning_rate": 8.570719444566702e-06, |
|
"loss": 0.0733, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.5325983394582823, |
|
"grad_norm": 0.1546473205089569, |
|
"learning_rate": 8.477606794488183e-06, |
|
"loss": 0.0839, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.5353205389955082, |
|
"grad_norm": 0.14650870859622955, |
|
"learning_rate": 8.384899322042356e-06, |
|
"loss": 0.0837, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.5380427385327344, |
|
"grad_norm": 0.11506952345371246, |
|
"learning_rate": 8.29259930068887e-06, |
|
"loss": 0.0775, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.5407649380699606, |
|
"grad_norm": 0.14615637063980103, |
|
"learning_rate": 8.200708993895476e-06, |
|
"loss": 0.0744, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.5434871376071866, |
|
"grad_norm": 0.13769914209842682, |
|
"learning_rate": 8.109230655082517e-06, |
|
"loss": 0.0795, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.5462093371444126, |
|
"grad_norm": 0.13748113811016083, |
|
"learning_rate": 8.018166527567672e-06, |
|
"loss": 0.0675, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.5489315366816387, |
|
"grad_norm": 0.15049228072166443, |
|
"learning_rate": 7.927518844510941e-06, |
|
"loss": 0.0703, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.551653736218865, |
|
"grad_norm": 0.13132914900779724, |
|
"learning_rate": 7.837289828859884e-06, |
|
"loss": 0.0742, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.554375935756091, |
|
"grad_norm": 0.14202824234962463, |
|
"learning_rate": 7.7474816932951e-06, |
|
"loss": 0.0699, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.5570981352933169, |
|
"grad_norm": 0.16140979528427124, |
|
"learning_rate": 7.658096640175985e-06, |
|
"loss": 0.0777, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.559820334830543, |
|
"grad_norm": 0.1300312578678131, |
|
"learning_rate": 7.569136861486686e-06, |
|
"loss": 0.0648, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.5625425343677692, |
|
"grad_norm": 0.14492963254451752, |
|
"learning_rate": 7.4806045387823935e-06, |
|
"loss": 0.0713, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.5652647339049952, |
|
"grad_norm": 0.13518249988555908, |
|
"learning_rate": 7.392501843135816e-06, |
|
"loss": 0.0649, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.5679869334422212, |
|
"grad_norm": 0.14563652873039246, |
|
"learning_rate": 7.304830935083934e-06, |
|
"loss": 0.0724, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.5707091329794474, |
|
"grad_norm": 0.1495407372713089, |
|
"learning_rate": 7.2175939645750454e-06, |
|
"loss": 0.0695, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.5734313325166736, |
|
"grad_norm": 0.15107296407222748, |
|
"learning_rate": 7.130793070916006e-06, |
|
"loss": 0.0762, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5761535320538995, |
|
"grad_norm": 0.13882042467594147, |
|
"learning_rate": 7.0444303827197994e-06, |
|
"loss": 0.0698, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.5788757315911255, |
|
"grad_norm": 0.15497738122940063, |
|
"learning_rate": 6.9585080178533155e-06, |
|
"loss": 0.0729, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5815979311283517, |
|
"grad_norm": 0.14168910682201385, |
|
"learning_rate": 6.873028083385435e-06, |
|
"loss": 0.0681, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.5843201306655779, |
|
"grad_norm": 0.1269499659538269, |
|
"learning_rate": 6.787992675535318e-06, |
|
"loss": 0.0803, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.5870423302028038, |
|
"grad_norm": 0.14480414986610413, |
|
"learning_rate": 6.703403879621048e-06, |
|
"loss": 0.0719, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.5897645297400298, |
|
"grad_norm": 0.14816336333751678, |
|
"learning_rate": 6.619263770008471e-06, |
|
"loss": 0.0764, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.592486729277256, |
|
"grad_norm": 0.13959960639476776, |
|
"learning_rate": 6.535574410060321e-06, |
|
"loss": 0.0777, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.5952089288144822, |
|
"grad_norm": 0.13289447128772736, |
|
"learning_rate": 6.452337852085621e-06, |
|
"loss": 0.077, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.5979311283517081, |
|
"grad_norm": 0.14249089360237122, |
|
"learning_rate": 6.369556137289373e-06, |
|
"loss": 0.0678, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.600653327888934, |
|
"grad_norm": 0.14189301431179047, |
|
"learning_rate": 6.28723129572247e-06, |
|
"loss": 0.077, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.6033755274261603, |
|
"grad_norm": 0.13121193647384644, |
|
"learning_rate": 6.2053653462319475e-06, |
|
"loss": 0.0651, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.6060977269633865, |
|
"grad_norm": 0.13624070584774017, |
|
"learning_rate": 6.123960296411449e-06, |
|
"loss": 0.0726, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.6088199265006125, |
|
"grad_norm": 0.1546383649110794, |
|
"learning_rate": 6.043018142552009e-06, |
|
"loss": 0.0747, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.6115421260378384, |
|
"grad_norm": 0.14421793818473816, |
|
"learning_rate": 5.962540869593081e-06, |
|
"loss": 0.0818, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.6142643255750646, |
|
"grad_norm": 0.13135267794132233, |
|
"learning_rate": 5.882530451073886e-06, |
|
"loss": 0.0724, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.6169865251122908, |
|
"grad_norm": 0.14589843153953552, |
|
"learning_rate": 5.8029888490850005e-06, |
|
"loss": 0.0769, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.6197087246495168, |
|
"grad_norm": 0.1423359513282776, |
|
"learning_rate": 5.723918014220236e-06, |
|
"loss": 0.0662, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.6224309241867427, |
|
"grad_norm": 0.12941160798072815, |
|
"learning_rate": 5.645319885528824e-06, |
|
"loss": 0.0938, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.625153123723969, |
|
"grad_norm": 0.16445757448673248, |
|
"learning_rate": 5.5671963904678185e-06, |
|
"loss": 0.0752, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.627875323261195, |
|
"grad_norm": 0.15879502892494202, |
|
"learning_rate": 5.489549444854908e-06, |
|
"loss": 0.0875, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.630597522798421, |
|
"grad_norm": 0.1418047547340393, |
|
"learning_rate": 5.412380952821358e-06, |
|
"loss": 0.0778, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.633319722335647, |
|
"grad_norm": 0.1362149566411972, |
|
"learning_rate": 5.33569280676536e-06, |
|
"loss": 0.0692, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.633319722335647, |
|
"eval_loss": 0.07731131464242935, |
|
"eval_runtime": 262.646, |
|
"eval_samples_per_second": 2.284, |
|
"eval_steps_per_second": 0.571, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6360419218728732, |
|
"grad_norm": 0.14646309614181519, |
|
"learning_rate": 5.259486887305592e-06, |
|
"loss": 0.068, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.6387641214100994, |
|
"grad_norm": 0.13968642055988312, |
|
"learning_rate": 5.183765063235138e-06, |
|
"loss": 0.0727, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.6414863209473254, |
|
"grad_norm": 0.1375684291124344, |
|
"learning_rate": 5.108529191475636e-06, |
|
"loss": 0.0794, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.6442085204845516, |
|
"grad_norm": 0.1326666921377182, |
|
"learning_rate": 5.033781117031738e-06, |
|
"loss": 0.072, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.6469307200217775, |
|
"grad_norm": 0.14453214406967163, |
|
"learning_rate": 4.959522672945891e-06, |
|
"loss": 0.0763, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.6496529195590037, |
|
"grad_norm": 0.15132352709770203, |
|
"learning_rate": 4.885755680253334e-06, |
|
"loss": 0.1175, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.65237511909623, |
|
"grad_norm": 0.15359079837799072, |
|
"learning_rate": 4.812481947937497e-06, |
|
"loss": 0.0837, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.6550973186334559, |
|
"grad_norm": 0.13241231441497803, |
|
"learning_rate": 4.739703272885626e-06, |
|
"loss": 0.0738, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.6578195181706818, |
|
"grad_norm": 0.14873500168323517, |
|
"learning_rate": 4.667421439844691e-06, |
|
"loss": 0.0708, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.660541717707908, |
|
"grad_norm": 0.13607364892959595, |
|
"learning_rate": 4.5956382213776295e-06, |
|
"loss": 0.1151, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.6632639172451342, |
|
"grad_norm": 0.15463878214359283, |
|
"learning_rate": 4.524355377819897e-06, |
|
"loss": 0.0831, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.6659861167823602, |
|
"grad_norm": 0.1272389441728592, |
|
"learning_rate": 4.45357465723629e-06, |
|
"loss": 0.0714, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.6687083163195862, |
|
"grad_norm": 0.14218097925186157, |
|
"learning_rate": 4.383297795378061e-06, |
|
"loss": 0.0675, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.6714305158568123, |
|
"grad_norm": 0.1373002678155899, |
|
"learning_rate": 4.3135265156403756e-06, |
|
"loss": 0.0791, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.6741527153940385, |
|
"grad_norm": 0.16303496062755585, |
|
"learning_rate": 4.244262529020041e-06, |
|
"loss": 0.0757, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.6768749149312645, |
|
"grad_norm": 0.1444956660270691, |
|
"learning_rate": 4.175507534073533e-06, |
|
"loss": 0.0732, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6795971144684905, |
|
"grad_norm": 0.1505775898694992, |
|
"learning_rate": 4.107263216875362e-06, |
|
"loss": 0.0751, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.6823193140057167, |
|
"grad_norm": 0.14044207334518433, |
|
"learning_rate": 4.039531250976735e-06, |
|
"loss": 0.069, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.6850415135429428, |
|
"grad_norm": 0.14999620616436005, |
|
"learning_rate": 3.972313297364494e-06, |
|
"loss": 0.0708, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.6877637130801688, |
|
"grad_norm": 0.1265767216682434, |
|
"learning_rate": 3.90561100442036e-06, |
|
"loss": 0.0655, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.6904859126173948, |
|
"grad_norm": 0.16221390664577484, |
|
"learning_rate": 3.8394260078805755e-06, |
|
"loss": 0.0736, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.693208112154621, |
|
"grad_norm": 0.15090176463127136, |
|
"learning_rate": 3.7737599307957365e-06, |
|
"loss": 0.0669, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.6959303116918472, |
|
"grad_norm": 0.14406718313694, |
|
"learning_rate": 3.708614383491016e-06, |
|
"loss": 0.0737, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.6986525112290731, |
|
"grad_norm": 0.12115464359521866, |
|
"learning_rate": 3.643990963526672e-06, |
|
"loss": 0.0627, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.701374710766299, |
|
"grad_norm": 0.16066431999206543, |
|
"learning_rate": 3.579891255658846e-06, |
|
"loss": 0.0757, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.7040969103035253, |
|
"grad_norm": 0.13997657597064972, |
|
"learning_rate": 3.5163168318007365e-06, |
|
"loss": 0.1398, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.7068191098407515, |
|
"grad_norm": 0.13934873044490814, |
|
"learning_rate": 3.4532692509840313e-06, |
|
"loss": 0.0723, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.7095413093779774, |
|
"grad_norm": 0.13022823631763458, |
|
"learning_rate": 3.390750059320688e-06, |
|
"loss": 0.0789, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.7122635089152034, |
|
"grad_norm": 0.13600829243659973, |
|
"learning_rate": 3.328760789964988e-06, |
|
"loss": 0.0728, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.7149857084524296, |
|
"grad_norm": 0.1362736076116562, |
|
"learning_rate": 3.2673029630759745e-06, |
|
"loss": 0.0694, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.7177079079896558, |
|
"grad_norm": 0.14168758690357208, |
|
"learning_rate": 3.2063780857801596e-06, |
|
"loss": 0.0726, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.7204301075268817, |
|
"grad_norm": 0.14045333862304688, |
|
"learning_rate": 3.145987652134563e-06, |
|
"loss": 0.0666, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.7231523070641077, |
|
"grad_norm": 0.15181277692317963, |
|
"learning_rate": 3.0861331430900807e-06, |
|
"loss": 0.0788, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.725874506601334, |
|
"grad_norm": 0.14058373868465424, |
|
"learning_rate": 3.0268160264551484e-06, |
|
"loss": 0.0701, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.72859670613856, |
|
"grad_norm": 0.1474706381559372, |
|
"learning_rate": 2.968037756859776e-06, |
|
"loss": 0.0745, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.731318905675786, |
|
"grad_norm": 0.13882222771644592, |
|
"learning_rate": 2.9097997757198516e-06, |
|
"loss": 0.0717, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.734041105213012, |
|
"grad_norm": 0.14754945039749146, |
|
"learning_rate": 2.8521035112018063e-06, |
|
"loss": 0.0644, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.7367633047502382, |
|
"grad_norm": 0.1717384308576584, |
|
"learning_rate": 2.7949503781875912e-06, |
|
"loss": 0.0768, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.7394855042874644, |
|
"grad_norm": 0.1543627679347992, |
|
"learning_rate": 2.7383417782399693e-06, |
|
"loss": 0.0709, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.7422077038246904, |
|
"grad_norm": 0.1619829684495926, |
|
"learning_rate": 2.6822790995681577e-06, |
|
"loss": 0.0814, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.7449299033619163, |
|
"grad_norm": 0.13856656849384308, |
|
"learning_rate": 2.62676371699378e-06, |
|
"loss": 0.0701, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.7476521028991425, |
|
"grad_norm": 0.14767934381961823, |
|
"learning_rate": 2.5717969919171553e-06, |
|
"loss": 0.076, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.7503743024363687, |
|
"grad_norm": 0.1731363832950592, |
|
"learning_rate": 2.5173802722838867e-06, |
|
"loss": 0.0711, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.7530965019735947, |
|
"grad_norm": 0.14000795781612396, |
|
"learning_rate": 2.4635148925518577e-06, |
|
"loss": 0.0675, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.7558187015108206, |
|
"grad_norm": 0.14177638292312622, |
|
"learning_rate": 2.4102021736584575e-06, |
|
"loss": 0.0659, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.7585409010480468, |
|
"grad_norm": 0.1450289785861969, |
|
"learning_rate": 2.3574434229882145e-06, |
|
"loss": 0.0836, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.761263100585273, |
|
"grad_norm": 0.15300573408603668, |
|
"learning_rate": 2.3052399343407277e-06, |
|
"loss": 0.079, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.763985300122499, |
|
"grad_norm": 0.12552043795585632, |
|
"learning_rate": 2.253592987898942e-06, |
|
"loss": 0.075, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.766707499659725, |
|
"grad_norm": 0.13973963260650635, |
|
"learning_rate": 2.2025038501977486e-06, |
|
"loss": 0.075, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.7694296991969511, |
|
"grad_norm": 0.14422014355659485, |
|
"learning_rate": 2.1519737740929274e-06, |
|
"loss": 0.0847, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.7721518987341773, |
|
"grad_norm": 0.15774239599704742, |
|
"learning_rate": 2.1020039987304285e-06, |
|
"loss": 0.0852, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.7748740982714033, |
|
"grad_norm": 0.14551898837089539, |
|
"learning_rate": 2.05259574951599e-06, |
|
"loss": 0.0726, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.7775962978086293, |
|
"grad_norm": 0.1303868442773819, |
|
"learning_rate": 2.003750238085053e-06, |
|
"loss": 0.076, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.7803184973458555, |
|
"grad_norm": 0.16554243862628937, |
|
"learning_rate": 1.9554686622730993e-06, |
|
"loss": 0.0992, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.7830406968830816, |
|
"grad_norm": 0.15280041098594666, |
|
"learning_rate": 1.907752206086247e-06, |
|
"loss": 0.0819, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.7857628964203076, |
|
"grad_norm": 0.14237718284130096, |
|
"learning_rate": 1.8606020396722129e-06, |
|
"loss": 0.0726, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7884850959575336, |
|
"grad_norm": 0.15816614031791687, |
|
"learning_rate": 1.814019319291635e-06, |
|
"loss": 0.0754, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.7912072954947598, |
|
"grad_norm": 0.13710108399391174, |
|
"learning_rate": 1.7680051872896869e-06, |
|
"loss": 0.0658, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.793929495031986, |
|
"grad_norm": 0.15852783620357513, |
|
"learning_rate": 1.7225607720681131e-06, |
|
"loss": 0.0732, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.796651694569212, |
|
"grad_norm": 0.15130798518657684, |
|
"learning_rate": 1.6776871880575084e-06, |
|
"loss": 0.0833, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.7993738941064379, |
|
"grad_norm": 0.15700189769268036, |
|
"learning_rate": 1.6333855356900185e-06, |
|
"loss": 0.1652, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.802096093643664, |
|
"grad_norm": 0.14298690855503082, |
|
"learning_rate": 1.5896569013723329e-06, |
|
"loss": 0.0797, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.8048182931808903, |
|
"grad_norm": 0.14160804450511932, |
|
"learning_rate": 1.5465023574590676e-06, |
|
"loss": 0.0757, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.8075404927181162, |
|
"grad_norm": 0.15241742134094238, |
|
"learning_rate": 1.5039229622264479e-06, |
|
"loss": 0.0781, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.8102626922553422, |
|
"grad_norm": 0.15816597640514374, |
|
"learning_rate": 1.4619197598463641e-06, |
|
"loss": 0.0764, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.8129848917925684, |
|
"grad_norm": 0.15082836151123047, |
|
"learning_rate": 1.420493780360771e-06, |
|
"loss": 0.0708, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.8157070913297946, |
|
"grad_norm": 0.13604667782783508, |
|
"learning_rate": 1.3796460396564098e-06, |
|
"loss": 0.0711, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.8184292908670205, |
|
"grad_norm": 0.14523722231388092, |
|
"learning_rate": 1.3393775394399123e-06, |
|
"loss": 0.0797, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.8211514904042465, |
|
"grad_norm": 0.14779233932495117, |
|
"learning_rate": 1.2996892672132416e-06, |
|
"loss": 0.0845, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.8238736899414727, |
|
"grad_norm": 0.13566653430461884, |
|
"learning_rate": 1.2605821962494574e-06, |
|
"loss": 0.1118, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.8265958894786989, |
|
"grad_norm": 0.15038765966892242, |
|
"learning_rate": 1.2220572855688507e-06, |
|
"loss": 0.0782, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.8293180890159249, |
|
"grad_norm": 0.14022061228752136, |
|
"learning_rate": 1.1841154799154374e-06, |
|
"loss": 0.0737, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.8320402885531508, |
|
"grad_norm": 0.14298531413078308, |
|
"learning_rate": 1.1467577097337867e-06, |
|
"loss": 0.0939, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.834762488090377, |
|
"grad_norm": 0.15060581266880035, |
|
"learning_rate": 1.1099848911462014e-06, |
|
"loss": 0.0752, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.8374846876276032, |
|
"grad_norm": 0.1449151188135147, |
|
"learning_rate": 1.0737979259302478e-06, |
|
"loss": 0.0688, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.8402068871648292, |
|
"grad_norm": 0.13491316139698029, |
|
"learning_rate": 1.0381977014966543e-06, |
|
"loss": 0.0996, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.8429290867020551, |
|
"grad_norm": 0.128363698720932, |
|
"learning_rate": 1.0031850908675283e-06, |
|
"loss": 0.1217, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.8456512862392813, |
|
"grad_norm": 0.14816632866859436, |
|
"learning_rate": 9.68760952654968e-07, |
|
"loss": 0.0653, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.8483734857765075, |
|
"grad_norm": 0.16936026513576508, |
|
"learning_rate": 9.349261310400037e-07, |
|
"loss": 0.0735, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.8510956853137335, |
|
"grad_norm": 0.14492131769657135, |
|
"learning_rate": 9.016814557518849e-07, |
|
"loss": 0.0683, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.8538178848509594, |
|
"grad_norm": 0.14610859751701355, |
|
"learning_rate": 8.690277420477372e-07, |
|
"loss": 0.079, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.8565400843881856, |
|
"grad_norm": 0.1451197862625122, |
|
"learning_rate": 8.369657906925732e-07, |
|
"loss": 0.0753, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.8592622839254118, |
|
"grad_norm": 0.14033135771751404, |
|
"learning_rate": 8.054963879396554e-07, |
|
"loss": 0.0646, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.8619844834626378, |
|
"grad_norm": 0.13456685841083527, |
|
"learning_rate": 7.746203055112145e-07, |
|
"loss": 0.1086, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.8647066829998638, |
|
"grad_norm": 0.15288980305194855, |
|
"learning_rate": 7.443383005795224e-07, |
|
"loss": 0.0946, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.86742888253709, |
|
"grad_norm": 0.14935944974422455, |
|
"learning_rate": 7.146511157483216e-07, |
|
"loss": 0.0829, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.8701510820743161, |
|
"grad_norm": 0.14166168868541718, |
|
"learning_rate": 6.85559479034617e-07, |
|
"loss": 0.0679, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.872873281611542, |
|
"grad_norm": 0.14961621165275574, |
|
"learning_rate": 6.570641038508296e-07, |
|
"loss": 0.0762, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.875595481148768, |
|
"grad_norm": 0.1400919258594513, |
|
"learning_rate": 6.291656889873016e-07, |
|
"loss": 0.0803, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.8783176806859943, |
|
"grad_norm": 0.13169647753238678, |
|
"learning_rate": 6.018649185951325e-07, |
|
"loss": 0.0707, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.8810398802232204, |
|
"grad_norm": 0.13241390883922577, |
|
"learning_rate": 5.751624621694429e-07, |
|
"loss": 0.0636, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.8837620797604464, |
|
"grad_norm": 0.16256216168403625, |
|
"learning_rate": 5.490589745329261e-07, |
|
"loss": 0.0774, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.8864842792976724, |
|
"grad_norm": 0.15392127633094788, |
|
"learning_rate": 5.235550958198083e-07, |
|
"loss": 0.073, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.8892064788348986, |
|
"grad_norm": 0.14630888402462006, |
|
"learning_rate": 4.986514514601415e-07, |
|
"loss": 0.0742, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.8919286783721248, |
|
"grad_norm": 0.16385363042354584, |
|
"learning_rate": 4.74348652164458e-07, |
|
"loss": 0.083, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.8946508779093507, |
|
"grad_norm": 0.16106607019901276, |
|
"learning_rate": 4.5064729390881246e-07, |
|
"loss": 0.0755, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.8973730774465767, |
|
"grad_norm": 0.13656839728355408, |
|
"learning_rate": 4.275479579201602e-07, |
|
"loss": 0.0703, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.9000952769838029, |
|
"grad_norm": 0.12616074085235596, |
|
"learning_rate": 4.050512106620913e-07, |
|
"loss": 0.0692, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.902817476521029, |
|
"grad_norm": 0.15891359746456146, |
|
"learning_rate": 3.8315760382096057e-07, |
|
"loss": 0.0794, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.905539676058255, |
|
"grad_norm": 0.1297856718301773, |
|
"learning_rate": 3.6186767429234323e-07, |
|
"loss": 0.0671, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.905539676058255, |
|
"eval_loss": 0.07686587423086166, |
|
"eval_runtime": 218.8338, |
|
"eval_samples_per_second": 2.742, |
|
"eval_steps_per_second": 0.685, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.908261875595481, |
|
"grad_norm": 0.16892676055431366, |
|
"learning_rate": 3.4118194416786764e-07, |
|
"loss": 0.076, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.9109840751327072, |
|
"grad_norm": 0.14325669407844543, |
|
"learning_rate": 3.211009207224225e-07, |
|
"loss": 0.0723, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.9137062746699334, |
|
"grad_norm": 0.12773250043392181, |
|
"learning_rate": 3.016250964017142e-07, |
|
"loss": 0.0727, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.9164284742071593, |
|
"grad_norm": 0.15402384102344513, |
|
"learning_rate": 2.827549488101849e-07, |
|
"loss": 0.0699, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.9191506737443853, |
|
"grad_norm": 0.13994073867797852, |
|
"learning_rate": 2.644909406992996e-07, |
|
"loss": 0.0674, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.9218728732816115, |
|
"grad_norm": 0.15044575929641724, |
|
"learning_rate": 2.468335199562105e-07, |
|
"loss": 0.0779, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.9245950728188377, |
|
"grad_norm": 0.13634447753429413, |
|
"learning_rate": 2.29783119592758e-07, |
|
"loss": 0.0697, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.9273172723560636, |
|
"grad_norm": 0.1296970546245575, |
|
"learning_rate": 2.1334015773486203e-07, |
|
"loss": 0.0682, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.9300394718932896, |
|
"grad_norm": 0.14532914757728577, |
|
"learning_rate": 1.975050376122667e-07, |
|
"loss": 0.0787, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.9327616714305158, |
|
"grad_norm": 0.15663999319076538, |
|
"learning_rate": 1.8227814754865068e-07, |
|
"loss": 0.0735, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"grad_norm": 0.13645371794700623, |
|
"learning_rate": 1.6765986095209908e-07, |
|
"loss": 0.0684, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.938206070504968, |
|
"grad_norm": 0.14688655734062195, |
|
"learning_rate": 1.536505363059576e-07, |
|
"loss": 0.0738, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.9409282700421941, |
|
"grad_norm": 0.14657242596149445, |
|
"learning_rate": 1.4025051716003157e-07, |
|
"loss": 0.083, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.9436504695794201, |
|
"grad_norm": 0.1277332305908203, |
|
"learning_rate": 1.2746013212217022e-07, |
|
"loss": 0.0758, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.9463726691166463, |
|
"grad_norm": 0.1419767141342163, |
|
"learning_rate": 1.1527969485019275e-07, |
|
"loss": 0.0755, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.9490948686538725, |
|
"grad_norm": 0.15132427215576172, |
|
"learning_rate": 1.0370950404421931e-07, |
|
"loss": 0.0651, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.9518170681910985, |
|
"grad_norm": 0.1558247208595276, |
|
"learning_rate": 9.274984343932702e-08, |
|
"loss": 0.0785, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.9545392677283244, |
|
"grad_norm": 0.1476377248764038, |
|
"learning_rate": 8.240098179859712e-08, |
|
"loss": 0.0826, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.9572614672655506, |
|
"grad_norm": 0.14471116662025452, |
|
"learning_rate": 7.26631729065258e-08, |
|
"loss": 0.0816, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.9599836668027768, |
|
"grad_norm": 0.14453163743019104, |
|
"learning_rate": 6.353665556280697e-08, |
|
"loss": 0.0699, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9627058663400028, |
|
"grad_norm": 0.13174843788146973, |
|
"learning_rate": 5.5021653576459164e-08, |
|
"loss": 0.062, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.9654280658772287, |
|
"grad_norm": 0.1312907189130783, |
|
"learning_rate": 4.7118375760357716e-08, |
|
"loss": 0.0804, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.968150265414455, |
|
"grad_norm": 0.15283526480197906, |
|
"learning_rate": 3.982701592609439e-08, |
|
"loss": 0.1359, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.9708724649516811, |
|
"grad_norm": 0.14205031096935272, |
|
"learning_rate": 3.314775287923677e-08, |
|
"loss": 0.071, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.973594664488907, |
|
"grad_norm": 0.16160933673381805, |
|
"learning_rate": 2.708075041494562e-08, |
|
"loss": 0.0784, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.976316864026133, |
|
"grad_norm": 0.12819606065750122, |
|
"learning_rate": 2.1626157313950345e-08, |
|
"loss": 0.097, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.9790390635633592, |
|
"grad_norm": 0.1536770612001419, |
|
"learning_rate": 1.6784107338899124e-08, |
|
"loss": 0.0775, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.9817612631005854, |
|
"grad_norm": 0.12998686730861664, |
|
"learning_rate": 1.2554719231083755e-08, |
|
"loss": 0.0696, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.9844834626378114, |
|
"grad_norm": 0.11487606167793274, |
|
"learning_rate": 8.93809670753365e-09, |
|
"loss": 0.0674, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.9872056621750374, |
|
"grad_norm": 0.13673199713230133, |
|
"learning_rate": 5.934328458459537e-09, |
|
"loss": 0.0712, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.9899278617122635, |
|
"grad_norm": 0.1361059844493866, |
|
"learning_rate": 3.543488145082985e-09, |
|
"loss": 0.0614, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.9926500612494897, |
|
"grad_norm": 0.15082420408725739, |
|
"learning_rate": 1.7656343978378342e-09, |
|
"loss": 0.0762, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.9953722607867157, |
|
"grad_norm": 0.1459612250328064, |
|
"learning_rate": 6.008108149185843e-10, |
|
"loss": 0.0841, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.9980944603239417, |
|
"grad_norm": 0.15308088064193726, |
|
"learning_rate": 4.904596122290439e-11, |
|
"loss": 0.0805, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.9991833401388321, |
|
"step": 3672, |
|
"total_flos": 2.724210105233965e+18, |
|
"train_loss": 0.10618524583693684, |
|
"train_runtime": 81161.5668, |
|
"train_samples_per_second": 0.724, |
|
"train_steps_per_second": 0.045 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3672, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.724210105233965e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|