|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001, |
|
"grad_norm": 1.7063226699829102, |
|
"learning_rate": 4.995e-05, |
|
"loss": 9.6305, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002, |
|
"grad_norm": 1.467505693435669, |
|
"learning_rate": 4.99e-05, |
|
"loss": 8.8474, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003, |
|
"grad_norm": 1.3338744640350342, |
|
"learning_rate": 4.9850000000000006e-05, |
|
"loss": 8.4272, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004, |
|
"grad_norm": 1.194218635559082, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 7.9969, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005, |
|
"grad_norm": 0.9542586207389832, |
|
"learning_rate": 4.975e-05, |
|
"loss": 7.8018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006, |
|
"grad_norm": 0.8312947154045105, |
|
"learning_rate": 4.97e-05, |
|
"loss": 7.5303, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.007, |
|
"grad_norm": 0.6978892683982849, |
|
"learning_rate": 4.965e-05, |
|
"loss": 7.3733, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 0.6895764470100403, |
|
"learning_rate": 4.96e-05, |
|
"loss": 7.2434, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.009, |
|
"grad_norm": 0.5555976033210754, |
|
"learning_rate": 4.9550000000000005e-05, |
|
"loss": 7.0877, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.836391806602478, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 7.0338, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011, |
|
"grad_norm": 0.782464861869812, |
|
"learning_rate": 4.945e-05, |
|
"loss": 6.878, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.012, |
|
"grad_norm": 1.3705933094024658, |
|
"learning_rate": 4.94e-05, |
|
"loss": 6.5874, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.013, |
|
"grad_norm": 0.7560775876045227, |
|
"learning_rate": 4.935e-05, |
|
"loss": 6.4978, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.014, |
|
"grad_norm": 1.3238508701324463, |
|
"learning_rate": 4.93e-05, |
|
"loss": 6.3998, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.015, |
|
"grad_norm": 0.7834548950195312, |
|
"learning_rate": 4.9250000000000004e-05, |
|
"loss": 6.2838, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 0.762347400188446, |
|
"learning_rate": 4.92e-05, |
|
"loss": 6.0387, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.017, |
|
"grad_norm": 0.7799501419067383, |
|
"learning_rate": 4.915e-05, |
|
"loss": 6.0241, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.018, |
|
"grad_norm": 0.7948866486549377, |
|
"learning_rate": 4.91e-05, |
|
"loss": 5.8776, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.019, |
|
"grad_norm": 0.9890483021736145, |
|
"learning_rate": 4.905e-05, |
|
"loss": 5.747, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.9131263494491577, |
|
"learning_rate": 4.9e-05, |
|
"loss": 5.644, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.021, |
|
"grad_norm": 1.7073436975479126, |
|
"learning_rate": 4.8950000000000004e-05, |
|
"loss": 5.778, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.022, |
|
"grad_norm": 0.8059922456741333, |
|
"learning_rate": 4.89e-05, |
|
"loss": 5.4755, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.023, |
|
"grad_norm": 1.2500686645507812, |
|
"learning_rate": 4.885e-05, |
|
"loss": 5.3769, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 1.3848680257797241, |
|
"learning_rate": 4.88e-05, |
|
"loss": 5.2105, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 1.2381746768951416, |
|
"learning_rate": 4.875e-05, |
|
"loss": 5.1444, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.026, |
|
"grad_norm": 2.7005224227905273, |
|
"learning_rate": 4.87e-05, |
|
"loss": 5.1608, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.027, |
|
"grad_norm": 1.1472671031951904, |
|
"learning_rate": 4.8650000000000003e-05, |
|
"loss": 4.9456, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.028, |
|
"grad_norm": 1.9849270582199097, |
|
"learning_rate": 4.86e-05, |
|
"loss": 4.8466, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.029, |
|
"grad_norm": 1.857001781463623, |
|
"learning_rate": 4.855e-05, |
|
"loss": 4.7323, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.6731220483779907, |
|
"learning_rate": 4.85e-05, |
|
"loss": 4.5786, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.031, |
|
"grad_norm": 1.7968906164169312, |
|
"learning_rate": 4.845e-05, |
|
"loss": 4.4588, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 1.7908226251602173, |
|
"learning_rate": 4.8400000000000004e-05, |
|
"loss": 4.3645, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.033, |
|
"grad_norm": 2.538881540298462, |
|
"learning_rate": 4.835e-05, |
|
"loss": 4.1489, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.034, |
|
"grad_norm": 2.306257486343384, |
|
"learning_rate": 4.83e-05, |
|
"loss": 3.9798, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.035, |
|
"grad_norm": 2.1730940341949463, |
|
"learning_rate": 4.825e-05, |
|
"loss": 4.0231, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.036, |
|
"grad_norm": 2.4211463928222656, |
|
"learning_rate": 4.82e-05, |
|
"loss": 3.8495, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.037, |
|
"grad_norm": 2.3698794841766357, |
|
"learning_rate": 4.815e-05, |
|
"loss": 3.6977, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.038, |
|
"grad_norm": 2.147799491882324, |
|
"learning_rate": 4.8100000000000004e-05, |
|
"loss": 3.8008, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.039, |
|
"grad_norm": 2.3577606678009033, |
|
"learning_rate": 4.805e-05, |
|
"loss": 3.6983, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.065912961959839, |
|
"learning_rate": 4.8e-05, |
|
"loss": 3.5738, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.041, |
|
"grad_norm": 2.930288314819336, |
|
"learning_rate": 4.795e-05, |
|
"loss": 3.5117, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.042, |
|
"grad_norm": 2.3703155517578125, |
|
"learning_rate": 4.79e-05, |
|
"loss": 3.2483, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.043, |
|
"grad_norm": 2.6050736904144287, |
|
"learning_rate": 4.785e-05, |
|
"loss": 3.2342, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.044, |
|
"grad_norm": 2.0790674686431885, |
|
"learning_rate": 4.78e-05, |
|
"loss": 3.1452, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.045, |
|
"grad_norm": 2.2497427463531494, |
|
"learning_rate": 4.775e-05, |
|
"loss": 3.0316, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.046, |
|
"grad_norm": 2.507902145385742, |
|
"learning_rate": 4.77e-05, |
|
"loss": 2.8938, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.047, |
|
"grad_norm": 2.517744541168213, |
|
"learning_rate": 4.765e-05, |
|
"loss": 2.8137, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 3.9981460571289062, |
|
"learning_rate": 4.76e-05, |
|
"loss": 2.9864, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.049, |
|
"grad_norm": 2.265026569366455, |
|
"learning_rate": 4.755e-05, |
|
"loss": 2.7839, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.257293701171875, |
|
"learning_rate": 4.75e-05, |
|
"loss": 2.6834, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.051, |
|
"grad_norm": 2.6932270526885986, |
|
"learning_rate": 4.745e-05, |
|
"loss": 2.5755, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.052, |
|
"grad_norm": 1.7177081108093262, |
|
"learning_rate": 4.74e-05, |
|
"loss": 2.425, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.053, |
|
"grad_norm": 2.2452073097229004, |
|
"learning_rate": 4.735e-05, |
|
"loss": 2.5261, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.054, |
|
"grad_norm": 2.2109947204589844, |
|
"learning_rate": 4.73e-05, |
|
"loss": 2.3825, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.055, |
|
"grad_norm": 2.574531078338623, |
|
"learning_rate": 4.7249999999999997e-05, |
|
"loss": 2.3087, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 2.3631017208099365, |
|
"learning_rate": 4.72e-05, |
|
"loss": 2.3099, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.057, |
|
"grad_norm": 2.3809709548950195, |
|
"learning_rate": 4.715e-05, |
|
"loss": 2.3001, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.058, |
|
"grad_norm": 2.0683534145355225, |
|
"learning_rate": 4.71e-05, |
|
"loss": 2.0813, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.059, |
|
"grad_norm": 2.5471837520599365, |
|
"learning_rate": 4.705e-05, |
|
"loss": 2.0378, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.585564374923706, |
|
"learning_rate": 4.7e-05, |
|
"loss": 2.2062, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.061, |
|
"grad_norm": 2.062100648880005, |
|
"learning_rate": 4.695e-05, |
|
"loss": 1.9914, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.062, |
|
"grad_norm": 2.1019210815429688, |
|
"learning_rate": 4.69e-05, |
|
"loss": 1.9635, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.063, |
|
"grad_norm": 2.630436658859253, |
|
"learning_rate": 4.685000000000001e-05, |
|
"loss": 1.9123, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 2.1028494834899902, |
|
"learning_rate": 4.6800000000000006e-05, |
|
"loss": 1.7583, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.065, |
|
"grad_norm": 2.392193078994751, |
|
"learning_rate": 4.6750000000000005e-05, |
|
"loss": 1.7532, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.066, |
|
"grad_norm": 2.004413366317749, |
|
"learning_rate": 4.6700000000000003e-05, |
|
"loss": 1.6978, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.067, |
|
"grad_norm": 2.210513114929199, |
|
"learning_rate": 4.665e-05, |
|
"loss": 1.6311, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.068, |
|
"grad_norm": 1.8464936017990112, |
|
"learning_rate": 4.660000000000001e-05, |
|
"loss": 1.5507, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.069, |
|
"grad_norm": 2.0246541500091553, |
|
"learning_rate": 4.655000000000001e-05, |
|
"loss": 1.5637, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.199751138687134, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 1.5603, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.071, |
|
"grad_norm": 2.2002196311950684, |
|
"learning_rate": 4.6450000000000004e-05, |
|
"loss": 1.4558, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 1.7826759815216064, |
|
"learning_rate": 4.64e-05, |
|
"loss": 1.4309, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.073, |
|
"grad_norm": 1.760297417640686, |
|
"learning_rate": 4.635e-05, |
|
"loss": 1.3531, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.074, |
|
"grad_norm": 2.0505475997924805, |
|
"learning_rate": 4.630000000000001e-05, |
|
"loss": 1.3641, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 2.1375396251678467, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 1.3259, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.076, |
|
"grad_norm": 1.8252328634262085, |
|
"learning_rate": 4.6200000000000005e-05, |
|
"loss": 1.2026, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.077, |
|
"grad_norm": 1.8945906162261963, |
|
"learning_rate": 4.6150000000000004e-05, |
|
"loss": 1.2878, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.078, |
|
"grad_norm": 1.7990881204605103, |
|
"learning_rate": 4.61e-05, |
|
"loss": 1.1853, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.079, |
|
"grad_norm": 1.4897470474243164, |
|
"learning_rate": 4.605e-05, |
|
"loss": 1.1279, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.2804617881774902, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.0804, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.081, |
|
"grad_norm": 1.4800664186477661, |
|
"learning_rate": 4.5950000000000006e-05, |
|
"loss": 1.0361, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.082, |
|
"grad_norm": 1.3526049852371216, |
|
"learning_rate": 4.5900000000000004e-05, |
|
"loss": 1.0585, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.083, |
|
"grad_norm": 1.534173607826233, |
|
"learning_rate": 4.585e-05, |
|
"loss": 1.0206, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.084, |
|
"grad_norm": 1.4844435453414917, |
|
"learning_rate": 4.58e-05, |
|
"loss": 0.9758, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.085, |
|
"grad_norm": 1.533679485321045, |
|
"learning_rate": 4.575e-05, |
|
"loss": 0.9168, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.086, |
|
"grad_norm": 1.456162691116333, |
|
"learning_rate": 4.5700000000000006e-05, |
|
"loss": 0.8913, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.087, |
|
"grad_norm": 1.7335631847381592, |
|
"learning_rate": 4.5650000000000005e-05, |
|
"loss": 0.9154, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 1.3331761360168457, |
|
"learning_rate": 4.5600000000000004e-05, |
|
"loss": 0.8483, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.089, |
|
"grad_norm": 1.6703053712844849, |
|
"learning_rate": 4.555e-05, |
|
"loss": 0.8116, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.275975227355957, |
|
"learning_rate": 4.55e-05, |
|
"loss": 0.7869, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.091, |
|
"grad_norm": 1.3800309896469116, |
|
"learning_rate": 4.545000000000001e-05, |
|
"loss": 0.7637, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.092, |
|
"grad_norm": 1.9472386837005615, |
|
"learning_rate": 4.5400000000000006e-05, |
|
"loss": 0.7212, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.093, |
|
"grad_norm": 1.3451333045959473, |
|
"learning_rate": 4.5350000000000005e-05, |
|
"loss": 0.6829, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.094, |
|
"grad_norm": 1.5209784507751465, |
|
"learning_rate": 4.53e-05, |
|
"loss": 0.729, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.095, |
|
"grad_norm": 1.3944469690322876, |
|
"learning_rate": 4.525e-05, |
|
"loss": 0.6732, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 1.2177132368087769, |
|
"learning_rate": 4.52e-05, |
|
"loss": 0.6188, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.097, |
|
"grad_norm": 1.5988528728485107, |
|
"learning_rate": 4.5150000000000006e-05, |
|
"loss": 0.6622, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.098, |
|
"grad_norm": 1.3636531829833984, |
|
"learning_rate": 4.5100000000000005e-05, |
|
"loss": 0.5792, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.099, |
|
"grad_norm": 1.377453088760376, |
|
"learning_rate": 4.5050000000000004e-05, |
|
"loss": 0.6062, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.295713186264038, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5709, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.101, |
|
"grad_norm": 1.35196852684021, |
|
"learning_rate": 4.495e-05, |
|
"loss": 0.5521, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.102, |
|
"grad_norm": 1.0617187023162842, |
|
"learning_rate": 4.49e-05, |
|
"loss": 0.5147, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.103, |
|
"grad_norm": 1.3035167455673218, |
|
"learning_rate": 4.4850000000000006e-05, |
|
"loss": 0.5081, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 1.2835568189620972, |
|
"learning_rate": 4.4800000000000005e-05, |
|
"loss": 0.5, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.105, |
|
"grad_norm": 1.0403038263320923, |
|
"learning_rate": 4.4750000000000004e-05, |
|
"loss": 0.4825, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.106, |
|
"grad_norm": 0.9538235068321228, |
|
"learning_rate": 4.47e-05, |
|
"loss": 0.4316, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.107, |
|
"grad_norm": 1.4246289730072021, |
|
"learning_rate": 4.465e-05, |
|
"loss": 0.4304, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.108, |
|
"grad_norm": 1.1217833757400513, |
|
"learning_rate": 4.46e-05, |
|
"loss": 0.4397, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.109, |
|
"grad_norm": 1.0411335229873657, |
|
"learning_rate": 4.4550000000000005e-05, |
|
"loss": 0.4057, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.8498069643974304, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 0.3933, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.111, |
|
"grad_norm": 1.1270406246185303, |
|
"learning_rate": 4.445e-05, |
|
"loss": 0.366, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 1.189041256904602, |
|
"learning_rate": 4.44e-05, |
|
"loss": 0.3407, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.113, |
|
"grad_norm": 0.9837467670440674, |
|
"learning_rate": 4.435e-05, |
|
"loss": 0.3511, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.114, |
|
"grad_norm": 1.0432955026626587, |
|
"learning_rate": 4.43e-05, |
|
"loss": 0.3381, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.115, |
|
"grad_norm": 0.9529951810836792, |
|
"learning_rate": 4.4250000000000005e-05, |
|
"loss": 0.3189, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.116, |
|
"grad_norm": 1.008836030960083, |
|
"learning_rate": 4.4200000000000004e-05, |
|
"loss": 0.3077, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.117, |
|
"grad_norm": 1.0005086660385132, |
|
"learning_rate": 4.415e-05, |
|
"loss": 0.3001, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.118, |
|
"grad_norm": 1.1065175533294678, |
|
"learning_rate": 4.41e-05, |
|
"loss": 0.28, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.119, |
|
"grad_norm": 0.6701949834823608, |
|
"learning_rate": 4.405e-05, |
|
"loss": 0.2692, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.7154658436775208, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.2663, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.121, |
|
"grad_norm": 0.6997113823890686, |
|
"learning_rate": 4.3950000000000004e-05, |
|
"loss": 0.2595, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.122, |
|
"grad_norm": 0.9047608971595764, |
|
"learning_rate": 4.39e-05, |
|
"loss": 0.2558, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.123, |
|
"grad_norm": 0.8508415222167969, |
|
"learning_rate": 4.385e-05, |
|
"loss": 0.2459, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.124, |
|
"grad_norm": 0.6505220532417297, |
|
"learning_rate": 4.38e-05, |
|
"loss": 0.2236, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.5360460877418518, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.2189, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.126, |
|
"grad_norm": 0.560817539691925, |
|
"learning_rate": 4.3700000000000005e-05, |
|
"loss": 0.2166, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.127, |
|
"grad_norm": 0.7089666128158569, |
|
"learning_rate": 4.3650000000000004e-05, |
|
"loss": 0.2026, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.5265817046165466, |
|
"learning_rate": 4.36e-05, |
|
"loss": 0.197, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.129, |
|
"grad_norm": 0.6629377007484436, |
|
"learning_rate": 4.355e-05, |
|
"loss": 0.1934, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.0730735063552856, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.1807, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.131, |
|
"grad_norm": 0.6990699172019958, |
|
"learning_rate": 4.345e-05, |
|
"loss": 0.1845, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.132, |
|
"grad_norm": 0.5047340393066406, |
|
"learning_rate": 4.3400000000000005e-05, |
|
"loss": 0.1725, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.133, |
|
"grad_norm": 0.6830994486808777, |
|
"learning_rate": 4.335e-05, |
|
"loss": 0.1687, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.134, |
|
"grad_norm": 0.5861710906028748, |
|
"learning_rate": 4.33e-05, |
|
"loss": 0.1671, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.135, |
|
"grad_norm": 0.43594300746917725, |
|
"learning_rate": 4.325e-05, |
|
"loss": 0.1467, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 0.44587692618370056, |
|
"learning_rate": 4.32e-05, |
|
"loss": 0.1509, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.137, |
|
"grad_norm": 0.5523977875709534, |
|
"learning_rate": 4.315e-05, |
|
"loss": 0.1434, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.138, |
|
"grad_norm": 0.6139170527458191, |
|
"learning_rate": 4.3100000000000004e-05, |
|
"loss": 0.1433, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.139, |
|
"grad_norm": 0.6169497966766357, |
|
"learning_rate": 4.305e-05, |
|
"loss": 0.1365, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.49120134115219116, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.1287, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.141, |
|
"grad_norm": 0.451753169298172, |
|
"learning_rate": 4.295e-05, |
|
"loss": 0.1142, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.142, |
|
"grad_norm": 0.5429627895355225, |
|
"learning_rate": 4.29e-05, |
|
"loss": 0.134, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.143, |
|
"grad_norm": 0.7613041400909424, |
|
"learning_rate": 4.285e-05, |
|
"loss": 0.1391, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 0.4953358471393585, |
|
"learning_rate": 4.2800000000000004e-05, |
|
"loss": 0.1197, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.145, |
|
"grad_norm": 0.3657626509666443, |
|
"learning_rate": 4.275e-05, |
|
"loss": 0.1071, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.146, |
|
"grad_norm": 0.44240206480026245, |
|
"learning_rate": 4.27e-05, |
|
"loss": 0.1111, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.147, |
|
"grad_norm": 0.5007165670394897, |
|
"learning_rate": 4.265e-05, |
|
"loss": 0.1056, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.148, |
|
"grad_norm": 0.4580256938934326, |
|
"learning_rate": 4.26e-05, |
|
"loss": 0.1049, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.149, |
|
"grad_norm": 0.4970822036266327, |
|
"learning_rate": 4.2550000000000004e-05, |
|
"loss": 0.1032, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4138182997703552, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.0961, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.151, |
|
"grad_norm": 0.4013712406158447, |
|
"learning_rate": 4.245e-05, |
|
"loss": 0.0949, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 0.3868940770626068, |
|
"learning_rate": 4.24e-05, |
|
"loss": 0.0837, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.153, |
|
"grad_norm": 0.3113015294075012, |
|
"learning_rate": 4.235e-05, |
|
"loss": 0.0909, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.154, |
|
"grad_norm": 0.3569623529911041, |
|
"learning_rate": 4.23e-05, |
|
"loss": 0.0908, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.155, |
|
"grad_norm": 0.3841746151447296, |
|
"learning_rate": 4.2250000000000004e-05, |
|
"loss": 0.0806, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.156, |
|
"grad_norm": 0.6565550565719604, |
|
"learning_rate": 4.22e-05, |
|
"loss": 0.075, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.157, |
|
"grad_norm": 0.4816874563694, |
|
"learning_rate": 4.215e-05, |
|
"loss": 0.0858, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.158, |
|
"grad_norm": 0.30408933758735657, |
|
"learning_rate": 4.21e-05, |
|
"loss": 0.0704, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.159, |
|
"grad_norm": 0.43388792872428894, |
|
"learning_rate": 4.205e-05, |
|
"loss": 0.0671, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.33304253220558167, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.07, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.161, |
|
"grad_norm": 0.4260387420654297, |
|
"learning_rate": 4.195e-05, |
|
"loss": 0.0691, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.162, |
|
"grad_norm": 0.37930798530578613, |
|
"learning_rate": 4.19e-05, |
|
"loss": 0.0715, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.163, |
|
"grad_norm": 0.3198983669281006, |
|
"learning_rate": 4.185e-05, |
|
"loss": 0.0651, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.164, |
|
"grad_norm": 0.3510359823703766, |
|
"learning_rate": 4.18e-05, |
|
"loss": 0.058, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.165, |
|
"grad_norm": 0.41047966480255127, |
|
"learning_rate": 4.175e-05, |
|
"loss": 0.065, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.166, |
|
"grad_norm": 0.3054174482822418, |
|
"learning_rate": 4.17e-05, |
|
"loss": 0.0564, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.167, |
|
"grad_norm": 0.29319772124290466, |
|
"learning_rate": 4.165e-05, |
|
"loss": 0.0599, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 0.257354736328125, |
|
"learning_rate": 4.16e-05, |
|
"loss": 0.0536, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.169, |
|
"grad_norm": 0.25215694308280945, |
|
"learning_rate": 4.155e-05, |
|
"loss": 0.0587, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.4573931097984314, |
|
"learning_rate": 4.15e-05, |
|
"loss": 0.0524, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.171, |
|
"grad_norm": 0.3514876663684845, |
|
"learning_rate": 4.145e-05, |
|
"loss": 0.0551, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.172, |
|
"grad_norm": 0.3239930272102356, |
|
"learning_rate": 4.14e-05, |
|
"loss": 0.0499, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.173, |
|
"grad_norm": 0.20213039219379425, |
|
"learning_rate": 4.135e-05, |
|
"loss": 0.0521, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.174, |
|
"grad_norm": 0.21831783652305603, |
|
"learning_rate": 4.13e-05, |
|
"loss": 0.0469, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 0.2585163712501526, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.0469, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 0.21717113256454468, |
|
"learning_rate": 4.12e-05, |
|
"loss": 0.0455, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.177, |
|
"grad_norm": 0.27248838543891907, |
|
"learning_rate": 4.115e-05, |
|
"loss": 0.046, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.178, |
|
"grad_norm": 0.2503461241722107, |
|
"learning_rate": 4.11e-05, |
|
"loss": 0.0447, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.179, |
|
"grad_norm": 0.27404382824897766, |
|
"learning_rate": 4.105e-05, |
|
"loss": 0.0437, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.23549066483974457, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0423, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.181, |
|
"grad_norm": 0.19369937479496002, |
|
"learning_rate": 4.095e-05, |
|
"loss": 0.0408, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.182, |
|
"grad_norm": 0.20560242235660553, |
|
"learning_rate": 4.09e-05, |
|
"loss": 0.0379, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.183, |
|
"grad_norm": 0.34989863634109497, |
|
"learning_rate": 4.085e-05, |
|
"loss": 0.0364, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 0.2310326248407364, |
|
"learning_rate": 4.08e-05, |
|
"loss": 0.0385, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.185, |
|
"grad_norm": 0.21055462956428528, |
|
"learning_rate": 4.075e-05, |
|
"loss": 0.0351, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.186, |
|
"grad_norm": 0.3251895308494568, |
|
"learning_rate": 4.07e-05, |
|
"loss": 0.0381, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.187, |
|
"grad_norm": 0.2887445390224457, |
|
"learning_rate": 4.065e-05, |
|
"loss": 0.0341, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.188, |
|
"grad_norm": 0.15948843955993652, |
|
"learning_rate": 4.0600000000000004e-05, |
|
"loss": 0.0313, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.189, |
|
"grad_norm": 0.2413359135389328, |
|
"learning_rate": 4.055e-05, |
|
"loss": 0.0338, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.2132706493139267, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.0339, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.191, |
|
"grad_norm": 0.17968431115150452, |
|
"learning_rate": 4.045000000000001e-05, |
|
"loss": 0.0317, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.15828929841518402, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 0.0302, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.193, |
|
"grad_norm": 0.18106874823570251, |
|
"learning_rate": 4.0350000000000005e-05, |
|
"loss": 0.0331, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.194, |
|
"grad_norm": 0.34827324748039246, |
|
"learning_rate": 4.0300000000000004e-05, |
|
"loss": 0.032, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.195, |
|
"grad_norm": 0.21621111035346985, |
|
"learning_rate": 4.025e-05, |
|
"loss": 0.0317, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.196, |
|
"grad_norm": 0.2159423679113388, |
|
"learning_rate": 4.02e-05, |
|
"loss": 0.0296, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.197, |
|
"grad_norm": 0.17750391364097595, |
|
"learning_rate": 4.015000000000001e-05, |
|
"loss": 0.0297, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.198, |
|
"grad_norm": 0.13952311873435974, |
|
"learning_rate": 4.0100000000000006e-05, |
|
"loss": 0.0279, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.199, |
|
"grad_norm": 0.19622887670993805, |
|
"learning_rate": 4.0050000000000004e-05, |
|
"loss": 0.0278, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.14959514141082764, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0251, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.201, |
|
"grad_norm": 0.17456738650798798, |
|
"learning_rate": 3.995e-05, |
|
"loss": 0.0315, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.202, |
|
"grad_norm": 0.15893588960170746, |
|
"learning_rate": 3.99e-05, |
|
"loss": 0.0243, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.203, |
|
"grad_norm": 0.14638105034828186, |
|
"learning_rate": 3.9850000000000006e-05, |
|
"loss": 0.0247, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.204, |
|
"grad_norm": 0.1714017242193222, |
|
"learning_rate": 3.9800000000000005e-05, |
|
"loss": 0.0252, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.205, |
|
"grad_norm": 0.18679572641849518, |
|
"learning_rate": 3.9750000000000004e-05, |
|
"loss": 0.0234, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.206, |
|
"grad_norm": 0.10623681545257568, |
|
"learning_rate": 3.97e-05, |
|
"loss": 0.0256, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.207, |
|
"grad_norm": 0.18566076457500458, |
|
"learning_rate": 3.965e-05, |
|
"loss": 0.0238, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 0.12487553805112839, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 0.0226, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.209, |
|
"grad_norm": 0.13191473484039307, |
|
"learning_rate": 3.9550000000000006e-05, |
|
"loss": 0.0232, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.225613072514534, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 0.0226, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.211, |
|
"grad_norm": 0.10896781831979752, |
|
"learning_rate": 3.9450000000000003e-05, |
|
"loss": 0.0206, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.212, |
|
"grad_norm": 0.16153796017169952, |
|
"learning_rate": 3.94e-05, |
|
"loss": 0.0195, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.213, |
|
"grad_norm": 0.19171251356601715, |
|
"learning_rate": 3.935e-05, |
|
"loss": 0.0203, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.214, |
|
"grad_norm": 0.13199982047080994, |
|
"learning_rate": 3.9300000000000007e-05, |
|
"loss": 0.0194, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.215, |
|
"grad_norm": 0.12839478254318237, |
|
"learning_rate": 3.9250000000000005e-05, |
|
"loss": 0.0227, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 0.10787441581487656, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.0195, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.217, |
|
"grad_norm": 0.1551046371459961, |
|
"learning_rate": 3.915e-05, |
|
"loss": 0.019, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.218, |
|
"grad_norm": 0.18844197690486908, |
|
"learning_rate": 3.91e-05, |
|
"loss": 0.02, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.219, |
|
"grad_norm": 0.21247665584087372, |
|
"learning_rate": 3.905e-05, |
|
"loss": 0.0206, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.11881183087825775, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.0176, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.221, |
|
"grad_norm": 0.16291823983192444, |
|
"learning_rate": 3.8950000000000005e-05, |
|
"loss": 0.0178, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.222, |
|
"grad_norm": 0.14063787460327148, |
|
"learning_rate": 3.8900000000000004e-05, |
|
"loss": 0.0177, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.223, |
|
"grad_norm": 0.15583930909633636, |
|
"learning_rate": 3.885e-05, |
|
"loss": 0.0185, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 0.16128882765769958, |
|
"learning_rate": 3.88e-05, |
|
"loss": 0.0168, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 0.1588473916053772, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.0167, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.226, |
|
"grad_norm": 0.10487533360719681, |
|
"learning_rate": 3.8700000000000006e-05, |
|
"loss": 0.0154, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.227, |
|
"grad_norm": 0.2638506591320038, |
|
"learning_rate": 3.8650000000000004e-05, |
|
"loss": 0.0179, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.228, |
|
"grad_norm": 0.12504911422729492, |
|
"learning_rate": 3.86e-05, |
|
"loss": 0.016, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.229, |
|
"grad_norm": 0.11655262857675552, |
|
"learning_rate": 3.855e-05, |
|
"loss": 0.0164, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.10052930563688278, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.0143, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.231, |
|
"grad_norm": 0.07682032138109207, |
|
"learning_rate": 3.845e-05, |
|
"loss": 0.0165, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 0.12146533280611038, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.0147, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.233, |
|
"grad_norm": 0.16349685192108154, |
|
"learning_rate": 3.8350000000000004e-05, |
|
"loss": 0.0166, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.234, |
|
"grad_norm": 0.10822432488203049, |
|
"learning_rate": 3.83e-05, |
|
"loss": 0.0146, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.235, |
|
"grad_norm": 0.0805143415927887, |
|
"learning_rate": 3.825e-05, |
|
"loss": 0.0131, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.236, |
|
"grad_norm": 0.08285068720579147, |
|
"learning_rate": 3.82e-05, |
|
"loss": 0.0134, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.237, |
|
"grad_norm": 0.0886882022023201, |
|
"learning_rate": 3.8150000000000006e-05, |
|
"loss": 0.0133, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.238, |
|
"grad_norm": 0.08793161064386368, |
|
"learning_rate": 3.8100000000000005e-05, |
|
"loss": 0.0142, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.239, |
|
"grad_norm": 0.08325997740030289, |
|
"learning_rate": 3.805e-05, |
|
"loss": 0.012, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.10990972816944122, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.0134, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.241, |
|
"grad_norm": 0.06695697456598282, |
|
"learning_rate": 3.795e-05, |
|
"loss": 0.0119, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.242, |
|
"grad_norm": 0.08304648846387863, |
|
"learning_rate": 3.79e-05, |
|
"loss": 0.0128, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.243, |
|
"grad_norm": 0.09513472020626068, |
|
"learning_rate": 3.7850000000000005e-05, |
|
"loss": 0.0146, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.244, |
|
"grad_norm": 0.07892587035894394, |
|
"learning_rate": 3.7800000000000004e-05, |
|
"loss": 0.0116, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.245, |
|
"grad_norm": 0.12630197405815125, |
|
"learning_rate": 3.775e-05, |
|
"loss": 0.0132, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.246, |
|
"grad_norm": 0.08250122517347336, |
|
"learning_rate": 3.77e-05, |
|
"loss": 0.013, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.247, |
|
"grad_norm": 0.09903154522180557, |
|
"learning_rate": 3.765e-05, |
|
"loss": 0.0117, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 0.09059987217187881, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.0111, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.249, |
|
"grad_norm": 0.09777077287435532, |
|
"learning_rate": 3.7550000000000005e-05, |
|
"loss": 0.0142, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.1801980435848236, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0121, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.251, |
|
"grad_norm": 0.08936703950166702, |
|
"learning_rate": 3.745e-05, |
|
"loss": 0.0112, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.252, |
|
"grad_norm": 0.09601296484470367, |
|
"learning_rate": 3.74e-05, |
|
"loss": 0.0116, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.253, |
|
"grad_norm": 0.08924739062786102, |
|
"learning_rate": 3.735e-05, |
|
"loss": 0.0119, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.254, |
|
"grad_norm": 0.07558383047580719, |
|
"learning_rate": 3.73e-05, |
|
"loss": 0.0108, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.255, |
|
"grad_norm": 0.05701779946684837, |
|
"learning_rate": 3.7250000000000004e-05, |
|
"loss": 0.011, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 0.0955379456281662, |
|
"learning_rate": 3.72e-05, |
|
"loss": 0.0112, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.257, |
|
"grad_norm": 0.07837249338626862, |
|
"learning_rate": 3.715e-05, |
|
"loss": 0.0111, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.258, |
|
"grad_norm": 0.09438953548669815, |
|
"learning_rate": 3.71e-05, |
|
"loss": 0.0121, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.259, |
|
"grad_norm": 0.08802532404661179, |
|
"learning_rate": 3.705e-05, |
|
"loss": 0.0098, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.0785641148686409, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.0106, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.261, |
|
"grad_norm": 0.10036404430866241, |
|
"learning_rate": 3.6950000000000004e-05, |
|
"loss": 0.0107, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.262, |
|
"grad_norm": 0.0663432776927948, |
|
"learning_rate": 3.69e-05, |
|
"loss": 0.0098, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.263, |
|
"grad_norm": 0.06886564195156097, |
|
"learning_rate": 3.685e-05, |
|
"loss": 0.0094, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 0.06641737371683121, |
|
"learning_rate": 3.68e-05, |
|
"loss": 0.0112, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.265, |
|
"grad_norm": 0.1470363438129425, |
|
"learning_rate": 3.675e-05, |
|
"loss": 0.0118, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.266, |
|
"grad_norm": 0.08694775402545929, |
|
"learning_rate": 3.6700000000000004e-05, |
|
"loss": 0.0105, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.267, |
|
"grad_norm": 0.08168693631887436, |
|
"learning_rate": 3.665e-05, |
|
"loss": 0.0101, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.268, |
|
"grad_norm": 0.06114206463098526, |
|
"learning_rate": 3.66e-05, |
|
"loss": 0.0097, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.269, |
|
"grad_norm": 0.09011895209550858, |
|
"learning_rate": 3.655e-05, |
|
"loss": 0.0101, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.06499819457530975, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.0089, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.271, |
|
"grad_norm": 0.08157055824995041, |
|
"learning_rate": 3.645e-05, |
|
"loss": 0.0099, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 0.06255003809928894, |
|
"learning_rate": 3.6400000000000004e-05, |
|
"loss": 0.0091, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.273, |
|
"grad_norm": 0.13641099631786346, |
|
"learning_rate": 3.635e-05, |
|
"loss": 0.01, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.274, |
|
"grad_norm": 0.06449054926633835, |
|
"learning_rate": 3.63e-05, |
|
"loss": 0.0087, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 0.09242594987154007, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.0084, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.276, |
|
"grad_norm": 0.14216932654380798, |
|
"learning_rate": 3.62e-05, |
|
"loss": 0.01, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.277, |
|
"grad_norm": 0.11992328613996506, |
|
"learning_rate": 3.615e-05, |
|
"loss": 0.0107, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.278, |
|
"grad_norm": 0.10537979751825333, |
|
"learning_rate": 3.61e-05, |
|
"loss": 0.0101, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.279, |
|
"grad_norm": 0.06420467048883438, |
|
"learning_rate": 3.605e-05, |
|
"loss": 0.0088, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.10813489556312561, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.0093, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.281, |
|
"grad_norm": 0.05735234543681145, |
|
"learning_rate": 3.595e-05, |
|
"loss": 0.0087, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.282, |
|
"grad_norm": 0.05712522938847542, |
|
"learning_rate": 3.59e-05, |
|
"loss": 0.0082, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.283, |
|
"grad_norm": 0.07710873335599899, |
|
"learning_rate": 3.585e-05, |
|
"loss": 0.0088, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.284, |
|
"grad_norm": 0.11007268726825714, |
|
"learning_rate": 3.58e-05, |
|
"loss": 0.0075, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.285, |
|
"grad_norm": 0.07825978100299835, |
|
"learning_rate": 3.575e-05, |
|
"loss": 0.0089, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.286, |
|
"grad_norm": 0.06950812041759491, |
|
"learning_rate": 3.57e-05, |
|
"loss": 0.0077, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.287, |
|
"grad_norm": 0.052572544664144516, |
|
"learning_rate": 3.565e-05, |
|
"loss": 0.0076, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 0.06588669121265411, |
|
"learning_rate": 3.56e-05, |
|
"loss": 0.0073, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.289, |
|
"grad_norm": 0.052969031035900116, |
|
"learning_rate": 3.555e-05, |
|
"loss": 0.0066, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.04204658418893814, |
|
"learning_rate": 3.55e-05, |
|
"loss": 0.0064, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.291, |
|
"grad_norm": 0.04765693470835686, |
|
"learning_rate": 3.545e-05, |
|
"loss": 0.0067, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.292, |
|
"grad_norm": 0.06796044856309891, |
|
"learning_rate": 3.54e-05, |
|
"loss": 0.0069, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.293, |
|
"grad_norm": 0.12173280119895935, |
|
"learning_rate": 3.535e-05, |
|
"loss": 0.0126, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.294, |
|
"grad_norm": 0.09393921494483948, |
|
"learning_rate": 3.53e-05, |
|
"loss": 0.0096, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.295, |
|
"grad_norm": 0.08246493339538574, |
|
"learning_rate": 3.525e-05, |
|
"loss": 0.0084, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 0.04482726752758026, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.0073, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.297, |
|
"grad_norm": 0.1147686317563057, |
|
"learning_rate": 3.515e-05, |
|
"loss": 0.0094, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.298, |
|
"grad_norm": 0.09143181890249252, |
|
"learning_rate": 3.51e-05, |
|
"loss": 0.0075, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.299, |
|
"grad_norm": 0.05911434814333916, |
|
"learning_rate": 3.505e-05, |
|
"loss": 0.0075, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.04372965916991234, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0072, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.301, |
|
"grad_norm": 0.05518479272723198, |
|
"learning_rate": 3.495e-05, |
|
"loss": 0.0068, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.302, |
|
"grad_norm": 0.04555105045437813, |
|
"learning_rate": 3.49e-05, |
|
"loss": 0.0064, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.303, |
|
"grad_norm": 0.03831150382757187, |
|
"learning_rate": 3.485e-05, |
|
"loss": 0.007, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 0.04596749320626259, |
|
"learning_rate": 3.48e-05, |
|
"loss": 0.0065, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.305, |
|
"grad_norm": 0.07694078236818314, |
|
"learning_rate": 3.475e-05, |
|
"loss": 0.0059, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.306, |
|
"grad_norm": 0.12307348102331161, |
|
"learning_rate": 3.4699999999999996e-05, |
|
"loss": 0.0099, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.307, |
|
"grad_norm": 0.059611763805150986, |
|
"learning_rate": 3.465e-05, |
|
"loss": 0.0067, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.308, |
|
"grad_norm": 0.07357806712388992, |
|
"learning_rate": 3.46e-05, |
|
"loss": 0.007, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.309, |
|
"grad_norm": 0.060446444898843765, |
|
"learning_rate": 3.455e-05, |
|
"loss": 0.0063, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.05178246274590492, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.0064, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.311, |
|
"grad_norm": 0.4560135006904602, |
|
"learning_rate": 3.445e-05, |
|
"loss": 0.0076, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 0.10910088568925858, |
|
"learning_rate": 3.4399999999999996e-05, |
|
"loss": 0.0082, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.313, |
|
"grad_norm": 0.05087321624159813, |
|
"learning_rate": 3.435e-05, |
|
"loss": 0.0061, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.314, |
|
"grad_norm": 0.055152568966150284, |
|
"learning_rate": 3.430000000000001e-05, |
|
"loss": 0.0065, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.315, |
|
"grad_norm": 0.48375555872917175, |
|
"learning_rate": 3.4250000000000006e-05, |
|
"loss": 0.0081, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.316, |
|
"grad_norm": 0.12223263829946518, |
|
"learning_rate": 3.4200000000000005e-05, |
|
"loss": 0.0098, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.317, |
|
"grad_norm": 0.15452681481838226, |
|
"learning_rate": 3.415e-05, |
|
"loss": 0.0087, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.318, |
|
"grad_norm": 0.06133843585848808, |
|
"learning_rate": 3.41e-05, |
|
"loss": 0.0087, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.319, |
|
"grad_norm": 0.04037950187921524, |
|
"learning_rate": 3.405e-05, |
|
"loss": 0.0062, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.05707933381199837, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.0061, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.321, |
|
"grad_norm": 0.04124099016189575, |
|
"learning_rate": 3.3950000000000005e-05, |
|
"loss": 0.0058, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.322, |
|
"grad_norm": 0.12988638877868652, |
|
"learning_rate": 3.3900000000000004e-05, |
|
"loss": 0.0062, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.323, |
|
"grad_norm": 0.04961306229233742, |
|
"learning_rate": 3.385e-05, |
|
"loss": 0.0057, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.324, |
|
"grad_norm": 0.05354069173336029, |
|
"learning_rate": 3.38e-05, |
|
"loss": 0.0062, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 0.04944461211562157, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 0.0061, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.326, |
|
"grad_norm": 0.03429180383682251, |
|
"learning_rate": 3.3700000000000006e-05, |
|
"loss": 0.0055, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.327, |
|
"grad_norm": 0.05271946266293526, |
|
"learning_rate": 3.3650000000000005e-05, |
|
"loss": 0.0054, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 0.03602539002895355, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 0.0049, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.329, |
|
"grad_norm": 0.03325178474187851, |
|
"learning_rate": 3.355e-05, |
|
"loss": 0.0052, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.03728079795837402, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.0057, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.331, |
|
"grad_norm": 0.053768668323755264, |
|
"learning_rate": 3.345000000000001e-05, |
|
"loss": 0.0055, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.332, |
|
"grad_norm": 0.054501548409461975, |
|
"learning_rate": 3.3400000000000005e-05, |
|
"loss": 0.0053, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.333, |
|
"grad_norm": 0.05519956722855568, |
|
"learning_rate": 3.3350000000000004e-05, |
|
"loss": 0.0053, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.334, |
|
"grad_norm": 0.05373954027891159, |
|
"learning_rate": 3.33e-05, |
|
"loss": 0.006, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.335, |
|
"grad_norm": 0.04272560030221939, |
|
"learning_rate": 3.325e-05, |
|
"loss": 0.0057, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 0.047061894088983536, |
|
"learning_rate": 3.32e-05, |
|
"loss": 0.0048, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.337, |
|
"grad_norm": 0.032794494181871414, |
|
"learning_rate": 3.3150000000000006e-05, |
|
"loss": 0.0046, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.338, |
|
"grad_norm": 0.027148200199007988, |
|
"learning_rate": 3.3100000000000005e-05, |
|
"loss": 0.0046, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.339, |
|
"grad_norm": 0.035516317933797836, |
|
"learning_rate": 3.3050000000000004e-05, |
|
"loss": 0.0048, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.046294230967760086, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.0063, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.341, |
|
"grad_norm": 0.07840899378061295, |
|
"learning_rate": 3.295e-05, |
|
"loss": 0.0047, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.342, |
|
"grad_norm": 0.04392802715301514, |
|
"learning_rate": 3.29e-05, |
|
"loss": 0.0048, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.343, |
|
"grad_norm": 0.04237942770123482, |
|
"learning_rate": 3.2850000000000006e-05, |
|
"loss": 0.0047, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 0.03379204496741295, |
|
"learning_rate": 3.2800000000000004e-05, |
|
"loss": 0.005, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.345, |
|
"grad_norm": 0.3130718469619751, |
|
"learning_rate": 3.275e-05, |
|
"loss": 0.0108, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.346, |
|
"grad_norm": 0.13712112605571747, |
|
"learning_rate": 3.27e-05, |
|
"loss": 0.0095, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.347, |
|
"grad_norm": 0.12321494519710541, |
|
"learning_rate": 3.265e-05, |
|
"loss": 0.0075, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.348, |
|
"grad_norm": 0.06602940708398819, |
|
"learning_rate": 3.26e-05, |
|
"loss": 0.0062, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.349, |
|
"grad_norm": 0.08250287175178528, |
|
"learning_rate": 3.2550000000000005e-05, |
|
"loss": 0.0053, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.04638442397117615, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0052, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.351, |
|
"grad_norm": 0.043373119086027145, |
|
"learning_rate": 3.245e-05, |
|
"loss": 0.0053, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 0.04895636439323425, |
|
"learning_rate": 3.24e-05, |
|
"loss": 0.0043, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.353, |
|
"grad_norm": 0.04256746545433998, |
|
"learning_rate": 3.235e-05, |
|
"loss": 0.0044, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.354, |
|
"grad_norm": 0.02967280149459839, |
|
"learning_rate": 3.2300000000000006e-05, |
|
"loss": 0.0046, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.355, |
|
"grad_norm": 0.02590947411954403, |
|
"learning_rate": 3.2250000000000005e-05, |
|
"loss": 0.0044, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.356, |
|
"grad_norm": 0.026240160688757896, |
|
"learning_rate": 3.2200000000000003e-05, |
|
"loss": 0.0041, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.357, |
|
"grad_norm": 0.048163361847400665, |
|
"learning_rate": 3.215e-05, |
|
"loss": 0.0048, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.358, |
|
"grad_norm": 0.04310280829668045, |
|
"learning_rate": 3.21e-05, |
|
"loss": 0.0044, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.359, |
|
"grad_norm": 0.027477843686938286, |
|
"learning_rate": 3.205e-05, |
|
"loss": 0.004, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.02773194946348667, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0046, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.361, |
|
"grad_norm": 0.027110638096928596, |
|
"learning_rate": 3.1950000000000004e-05, |
|
"loss": 0.004, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.362, |
|
"grad_norm": 0.04346521571278572, |
|
"learning_rate": 3.19e-05, |
|
"loss": 0.0039, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.363, |
|
"grad_norm": 0.024588119238615036, |
|
"learning_rate": 3.185e-05, |
|
"loss": 0.0041, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.364, |
|
"grad_norm": 0.03631160408258438, |
|
"learning_rate": 3.18e-05, |
|
"loss": 0.0039, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.365, |
|
"grad_norm": 0.028497323393821716, |
|
"learning_rate": 3.175e-05, |
|
"loss": 0.0045, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.366, |
|
"grad_norm": 0.06324070692062378, |
|
"learning_rate": 3.1700000000000005e-05, |
|
"loss": 0.004, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.367, |
|
"grad_norm": 0.22121182084083557, |
|
"learning_rate": 3.1650000000000004e-05, |
|
"loss": 0.0078, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 0.130402073264122, |
|
"learning_rate": 3.16e-05, |
|
"loss": 0.0068, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.369, |
|
"grad_norm": 0.11193361133337021, |
|
"learning_rate": 3.155e-05, |
|
"loss": 0.0052, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.043261051177978516, |
|
"learning_rate": 3.15e-05, |
|
"loss": 0.0046, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.371, |
|
"grad_norm": 0.05132100731134415, |
|
"learning_rate": 3.145e-05, |
|
"loss": 0.0041, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.372, |
|
"grad_norm": 0.03498228266835213, |
|
"learning_rate": 3.1400000000000004e-05, |
|
"loss": 0.0041, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.373, |
|
"grad_norm": 0.03594733029603958, |
|
"learning_rate": 3.135e-05, |
|
"loss": 0.0037, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.374, |
|
"grad_norm": 0.05542735382914543, |
|
"learning_rate": 3.13e-05, |
|
"loss": 0.0042, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.03302931785583496, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.0048, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 0.028392167761921883, |
|
"learning_rate": 3.12e-05, |
|
"loss": 0.0042, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.377, |
|
"grad_norm": 0.05274713411927223, |
|
"learning_rate": 3.115e-05, |
|
"loss": 0.0045, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.378, |
|
"grad_norm": 0.024890929460525513, |
|
"learning_rate": 3.1100000000000004e-05, |
|
"loss": 0.0038, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.379, |
|
"grad_norm": 0.02797631174325943, |
|
"learning_rate": 3.105e-05, |
|
"loss": 0.0036, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.033390454947948456, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.0039, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.381, |
|
"grad_norm": 0.024741416797041893, |
|
"learning_rate": 3.095e-05, |
|
"loss": 0.0042, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.382, |
|
"grad_norm": 0.05398337543010712, |
|
"learning_rate": 3.09e-05, |
|
"loss": 0.0049, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.383, |
|
"grad_norm": 0.027936646714806557, |
|
"learning_rate": 3.0850000000000004e-05, |
|
"loss": 0.0034, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.02413495071232319, |
|
"learning_rate": 3.08e-05, |
|
"loss": 0.0036, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.385, |
|
"grad_norm": 0.037689995020627975, |
|
"learning_rate": 3.075e-05, |
|
"loss": 0.0033, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.386, |
|
"grad_norm": 0.028174949809908867, |
|
"learning_rate": 3.07e-05, |
|
"loss": 0.0036, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.387, |
|
"grad_norm": 0.064354807138443, |
|
"learning_rate": 3.065e-05, |
|
"loss": 0.0037, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.388, |
|
"grad_norm": 0.028341595083475113, |
|
"learning_rate": 3.06e-05, |
|
"loss": 0.0034, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.389, |
|
"grad_norm": 0.06142325699329376, |
|
"learning_rate": 3.0550000000000004e-05, |
|
"loss": 0.004, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.03553822636604309, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.004, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.391, |
|
"grad_norm": 0.025645367801189423, |
|
"learning_rate": 3.045e-05, |
|
"loss": 0.0032, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 0.053947921842336655, |
|
"learning_rate": 3.04e-05, |
|
"loss": 0.0042, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.393, |
|
"grad_norm": 0.040126167237758636, |
|
"learning_rate": 3.035e-05, |
|
"loss": 0.0038, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.394, |
|
"grad_norm": 0.02956206165254116, |
|
"learning_rate": 3.03e-05, |
|
"loss": 0.0035, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.395, |
|
"grad_norm": 0.11952024698257446, |
|
"learning_rate": 3.025e-05, |
|
"loss": 0.0057, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.396, |
|
"grad_norm": 0.04155363142490387, |
|
"learning_rate": 3.02e-05, |
|
"loss": 0.0034, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.397, |
|
"grad_norm": 0.03884551301598549, |
|
"learning_rate": 3.015e-05, |
|
"loss": 0.0037, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.398, |
|
"grad_norm": 0.033869728446006775, |
|
"learning_rate": 3.01e-05, |
|
"loss": 0.0033, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.399, |
|
"grad_norm": 0.027508044615387917, |
|
"learning_rate": 3.0050000000000002e-05, |
|
"loss": 0.0038, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.019838711246848106, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0034, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.401, |
|
"grad_norm": 0.042124535888433456, |
|
"learning_rate": 2.995e-05, |
|
"loss": 0.003, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.402, |
|
"grad_norm": 0.03583139553666115, |
|
"learning_rate": 2.9900000000000002e-05, |
|
"loss": 0.0029, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.403, |
|
"grad_norm": 0.024187223985791206, |
|
"learning_rate": 2.985e-05, |
|
"loss": 0.0031, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.404, |
|
"grad_norm": 0.02509123831987381, |
|
"learning_rate": 2.98e-05, |
|
"loss": 0.003, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.405, |
|
"grad_norm": 0.015798581764101982, |
|
"learning_rate": 2.975e-05, |
|
"loss": 0.003, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.406, |
|
"grad_norm": 0.01964486949145794, |
|
"learning_rate": 2.97e-05, |
|
"loss": 0.0032, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.407, |
|
"grad_norm": 0.025820232927799225, |
|
"learning_rate": 2.965e-05, |
|
"loss": 0.0032, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 0.03453589975833893, |
|
"learning_rate": 2.96e-05, |
|
"loss": 0.003, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.409, |
|
"grad_norm": 0.022311529144644737, |
|
"learning_rate": 2.955e-05, |
|
"loss": 0.0025, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.02296466939151287, |
|
"learning_rate": 2.95e-05, |
|
"loss": 0.003, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.411, |
|
"grad_norm": 0.022816313430666924, |
|
"learning_rate": 2.945e-05, |
|
"loss": 0.0032, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.412, |
|
"grad_norm": 0.021030904725193977, |
|
"learning_rate": 2.94e-05, |
|
"loss": 0.003, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.413, |
|
"grad_norm": 0.02336346171796322, |
|
"learning_rate": 2.935e-05, |
|
"loss": 0.0028, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.414, |
|
"grad_norm": 0.019582638517022133, |
|
"learning_rate": 2.93e-05, |
|
"loss": 0.0027, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.415, |
|
"grad_norm": 0.031429585069417953, |
|
"learning_rate": 2.925e-05, |
|
"loss": 0.0027, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 0.027804825454950333, |
|
"learning_rate": 2.9199999999999998e-05, |
|
"loss": 0.0027, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.417, |
|
"grad_norm": 0.022006656974554062, |
|
"learning_rate": 2.915e-05, |
|
"loss": 0.003, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.418, |
|
"grad_norm": 0.052478939294815063, |
|
"learning_rate": 2.91e-05, |
|
"loss": 0.0044, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.419, |
|
"grad_norm": 0.03854925185441971, |
|
"learning_rate": 2.9049999999999998e-05, |
|
"loss": 0.0036, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.02749469131231308, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.0033, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.421, |
|
"grad_norm": 0.01697971485555172, |
|
"learning_rate": 2.895e-05, |
|
"loss": 0.0032, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.422, |
|
"grad_norm": 0.05183997377753258, |
|
"learning_rate": 2.8899999999999998e-05, |
|
"loss": 0.0029, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.423, |
|
"grad_norm": 0.030102815479040146, |
|
"learning_rate": 2.885e-05, |
|
"loss": 0.0028, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 0.027241216972470284, |
|
"learning_rate": 2.88e-05, |
|
"loss": 0.0033, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 0.01855759136378765, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 0.0024, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.426, |
|
"grad_norm": 0.019300928339362144, |
|
"learning_rate": 2.87e-05, |
|
"loss": 0.0024, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.427, |
|
"grad_norm": 0.01639522798359394, |
|
"learning_rate": 2.865e-05, |
|
"loss": 0.0026, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.428, |
|
"grad_norm": 0.027084793895483017, |
|
"learning_rate": 2.86e-05, |
|
"loss": 0.0027, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.429, |
|
"grad_norm": 0.021206015720963478, |
|
"learning_rate": 2.855e-05, |
|
"loss": 0.0025, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.0655827671289444, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.0027, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.431, |
|
"grad_norm": 0.03779730945825577, |
|
"learning_rate": 2.845e-05, |
|
"loss": 0.0024, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 0.045267749577760696, |
|
"learning_rate": 2.84e-05, |
|
"loss": 0.0031, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.433, |
|
"grad_norm": 0.017473919317126274, |
|
"learning_rate": 2.8349999999999998e-05, |
|
"loss": 0.0027, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.434, |
|
"grad_norm": 0.019513197243213654, |
|
"learning_rate": 2.83e-05, |
|
"loss": 0.0024, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.435, |
|
"grad_norm": 0.01616765186190605, |
|
"learning_rate": 2.825e-05, |
|
"loss": 0.0025, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.436, |
|
"grad_norm": 0.02270474284887314, |
|
"learning_rate": 2.8199999999999998e-05, |
|
"loss": 0.0024, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.437, |
|
"grad_norm": 0.02363002672791481, |
|
"learning_rate": 2.815e-05, |
|
"loss": 0.0025, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.438, |
|
"grad_norm": 0.023898936808109283, |
|
"learning_rate": 2.8100000000000005e-05, |
|
"loss": 0.0023, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.439, |
|
"grad_norm": 0.01270793005824089, |
|
"learning_rate": 2.8050000000000004e-05, |
|
"loss": 0.0022, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.03917006403207779, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.0029, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.441, |
|
"grad_norm": 0.028284449130296707, |
|
"learning_rate": 2.7950000000000005e-05, |
|
"loss": 0.0023, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.442, |
|
"grad_norm": 0.017493903636932373, |
|
"learning_rate": 2.7900000000000004e-05, |
|
"loss": 0.0022, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.443, |
|
"grad_norm": 0.03160572797060013, |
|
"learning_rate": 2.7850000000000003e-05, |
|
"loss": 0.0025, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.444, |
|
"grad_norm": 0.022926049306988716, |
|
"learning_rate": 2.7800000000000005e-05, |
|
"loss": 0.0024, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.445, |
|
"grad_norm": 0.032902974635362625, |
|
"learning_rate": 2.7750000000000004e-05, |
|
"loss": 0.0025, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.446, |
|
"grad_norm": 0.017781972885131836, |
|
"learning_rate": 2.7700000000000002e-05, |
|
"loss": 0.0022, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.447, |
|
"grad_norm": 0.1209416389465332, |
|
"learning_rate": 2.7650000000000005e-05, |
|
"loss": 0.0028, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 0.03747338801622391, |
|
"learning_rate": 2.7600000000000003e-05, |
|
"loss": 0.0024, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.449, |
|
"grad_norm": 0.04210735112428665, |
|
"learning_rate": 2.7550000000000002e-05, |
|
"loss": 0.0024, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.02306324429810047, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0022, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.451, |
|
"grad_norm": 0.027622856199741364, |
|
"learning_rate": 2.7450000000000003e-05, |
|
"loss": 0.0026, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.452, |
|
"grad_norm": 0.014202162623405457, |
|
"learning_rate": 2.7400000000000002e-05, |
|
"loss": 0.0022, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.453, |
|
"grad_norm": 0.051466915756464005, |
|
"learning_rate": 2.7350000000000004e-05, |
|
"loss": 0.0031, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.454, |
|
"grad_norm": 0.052768610417842865, |
|
"learning_rate": 2.7300000000000003e-05, |
|
"loss": 0.0028, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.455, |
|
"grad_norm": 0.02291076071560383, |
|
"learning_rate": 2.725e-05, |
|
"loss": 0.0023, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 0.027942989021539688, |
|
"learning_rate": 2.7200000000000004e-05, |
|
"loss": 0.0023, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.457, |
|
"grad_norm": 0.01529670413583517, |
|
"learning_rate": 2.7150000000000003e-05, |
|
"loss": 0.0024, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.458, |
|
"grad_norm": 0.02945224940776825, |
|
"learning_rate": 2.7100000000000005e-05, |
|
"loss": 0.0024, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.459, |
|
"grad_norm": 0.027197351679205894, |
|
"learning_rate": 2.7050000000000004e-05, |
|
"loss": 0.0024, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.022022951394319534, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.0023, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.461, |
|
"grad_norm": 0.019739823415875435, |
|
"learning_rate": 2.6950000000000005e-05, |
|
"loss": 0.0022, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.462, |
|
"grad_norm": 0.06794995814561844, |
|
"learning_rate": 2.6900000000000003e-05, |
|
"loss": 0.0027, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.463, |
|
"grad_norm": 0.049228962510824203, |
|
"learning_rate": 2.6850000000000002e-05, |
|
"loss": 0.0026, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 0.0241558700799942, |
|
"learning_rate": 2.6800000000000004e-05, |
|
"loss": 0.0021, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.465, |
|
"grad_norm": 0.024576248601078987, |
|
"learning_rate": 2.6750000000000003e-05, |
|
"loss": 0.0022, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.466, |
|
"grad_norm": 0.030337205156683922, |
|
"learning_rate": 2.6700000000000002e-05, |
|
"loss": 0.0024, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.467, |
|
"grad_norm": 0.015081087127327919, |
|
"learning_rate": 2.6650000000000004e-05, |
|
"loss": 0.002, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.468, |
|
"grad_norm": 0.026368912309408188, |
|
"learning_rate": 2.6600000000000003e-05, |
|
"loss": 0.0022, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.469, |
|
"grad_norm": 0.018447600305080414, |
|
"learning_rate": 2.655e-05, |
|
"loss": 0.0022, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.018314722925424576, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"loss": 0.0019, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.471, |
|
"grad_norm": 0.02361704409122467, |
|
"learning_rate": 2.6450000000000003e-05, |
|
"loss": 0.0023, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 0.02032247930765152, |
|
"learning_rate": 2.64e-05, |
|
"loss": 0.002, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.473, |
|
"grad_norm": 0.017889728769659996, |
|
"learning_rate": 2.6350000000000004e-05, |
|
"loss": 0.0019, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.474, |
|
"grad_norm": 0.01962173730134964, |
|
"learning_rate": 2.6300000000000002e-05, |
|
"loss": 0.0019, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 0.015778113156557083, |
|
"learning_rate": 2.625e-05, |
|
"loss": 0.0021, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.476, |
|
"grad_norm": 0.01894952729344368, |
|
"learning_rate": 2.6200000000000003e-05, |
|
"loss": 0.0023, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.477, |
|
"grad_norm": 0.0239462498575449, |
|
"learning_rate": 2.6150000000000002e-05, |
|
"loss": 0.002, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.478, |
|
"grad_norm": 0.025406278669834137, |
|
"learning_rate": 2.61e-05, |
|
"loss": 0.0021, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.479, |
|
"grad_norm": 0.01813661865890026, |
|
"learning_rate": 2.6050000000000003e-05, |
|
"loss": 0.0023, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.01979188807308674, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.0019, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.481, |
|
"grad_norm": 0.02219184674322605, |
|
"learning_rate": 2.595e-05, |
|
"loss": 0.002, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.482, |
|
"grad_norm": 0.012867514044046402, |
|
"learning_rate": 2.5900000000000003e-05, |
|
"loss": 0.0017, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.483, |
|
"grad_norm": 0.014178570359945297, |
|
"learning_rate": 2.585e-05, |
|
"loss": 0.0023, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.484, |
|
"grad_norm": 0.013582895509898663, |
|
"learning_rate": 2.58e-05, |
|
"loss": 0.002, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.485, |
|
"grad_norm": 0.02718137763440609, |
|
"learning_rate": 2.5750000000000002e-05, |
|
"loss": 0.0019, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.486, |
|
"grad_norm": 0.016559738665819168, |
|
"learning_rate": 2.57e-05, |
|
"loss": 0.0019, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.487, |
|
"grad_norm": 0.01447515282779932, |
|
"learning_rate": 2.5650000000000003e-05, |
|
"loss": 0.0017, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 0.010251802392303944, |
|
"learning_rate": 2.5600000000000002e-05, |
|
"loss": 0.0017, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.489, |
|
"grad_norm": 0.021138856187462807, |
|
"learning_rate": 2.555e-05, |
|
"loss": 0.0023, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.026664163917303085, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"loss": 0.0021, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.491, |
|
"grad_norm": 0.012794408947229385, |
|
"learning_rate": 2.5450000000000002e-05, |
|
"loss": 0.0017, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.492, |
|
"grad_norm": 0.013725240714848042, |
|
"learning_rate": 2.54e-05, |
|
"loss": 0.0018, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.493, |
|
"grad_norm": 0.01432815007865429, |
|
"learning_rate": 2.5350000000000003e-05, |
|
"loss": 0.0017, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.494, |
|
"grad_norm": 0.014761424623429775, |
|
"learning_rate": 2.5300000000000002e-05, |
|
"loss": 0.0019, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.495, |
|
"grad_norm": 0.06742983311414719, |
|
"learning_rate": 2.525e-05, |
|
"loss": 0.0025, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 0.024261673912405968, |
|
"learning_rate": 2.5200000000000003e-05, |
|
"loss": 0.0021, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.497, |
|
"grad_norm": 0.02116272784769535, |
|
"learning_rate": 2.515e-05, |
|
"loss": 0.002, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.498, |
|
"grad_norm": 0.014996570535004139, |
|
"learning_rate": 2.51e-05, |
|
"loss": 0.0017, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.499, |
|
"grad_norm": 0.014970551244914532, |
|
"learning_rate": 2.5050000000000002e-05, |
|
"loss": 0.002, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.01756688393652439, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.002, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.501, |
|
"grad_norm": 0.012683290056884289, |
|
"learning_rate": 2.495e-05, |
|
"loss": 0.0016, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.502, |
|
"grad_norm": 0.011495651677250862, |
|
"learning_rate": 2.4900000000000002e-05, |
|
"loss": 0.0016, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.503, |
|
"grad_norm": 0.014306634664535522, |
|
"learning_rate": 2.485e-05, |
|
"loss": 0.0018, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 0.02241896465420723, |
|
"learning_rate": 2.48e-05, |
|
"loss": 0.0021, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.505, |
|
"grad_norm": 0.017740361392498016, |
|
"learning_rate": 2.4750000000000002e-05, |
|
"loss": 0.0016, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.506, |
|
"grad_norm": 0.013199679553508759, |
|
"learning_rate": 2.47e-05, |
|
"loss": 0.0015, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.507, |
|
"grad_norm": 0.057298243045806885, |
|
"learning_rate": 2.465e-05, |
|
"loss": 0.0019, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.508, |
|
"grad_norm": 0.03238265961408615, |
|
"learning_rate": 2.46e-05, |
|
"loss": 0.0026, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.509, |
|
"grad_norm": 0.04820936918258667, |
|
"learning_rate": 2.455e-05, |
|
"loss": 0.0027, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.022526515647768974, |
|
"learning_rate": 2.45e-05, |
|
"loss": 0.0018, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.511, |
|
"grad_norm": 0.1899888962507248, |
|
"learning_rate": 2.445e-05, |
|
"loss": 0.0026, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 0.05366889387369156, |
|
"learning_rate": 2.44e-05, |
|
"loss": 0.003, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.513, |
|
"grad_norm": 0.028939131647348404, |
|
"learning_rate": 2.435e-05, |
|
"loss": 0.0021, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.514, |
|
"grad_norm": 0.023352844640612602, |
|
"learning_rate": 2.43e-05, |
|
"loss": 0.0019, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.515, |
|
"grad_norm": 0.015283104963600636, |
|
"learning_rate": 2.425e-05, |
|
"loss": 0.0017, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.516, |
|
"grad_norm": 0.0149134686216712, |
|
"learning_rate": 2.4200000000000002e-05, |
|
"loss": 0.0016, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.517, |
|
"grad_norm": 0.01739874854683876, |
|
"learning_rate": 2.415e-05, |
|
"loss": 0.0021, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.518, |
|
"grad_norm": 0.012562318705022335, |
|
"learning_rate": 2.41e-05, |
|
"loss": 0.0016, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.519, |
|
"grad_norm": 0.01181173324584961, |
|
"learning_rate": 2.4050000000000002e-05, |
|
"loss": 0.0017, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.0216183140873909, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0017, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.521, |
|
"grad_norm": 0.014552557840943336, |
|
"learning_rate": 2.395e-05, |
|
"loss": 0.0017, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.522, |
|
"grad_norm": 0.013402258977293968, |
|
"learning_rate": 2.39e-05, |
|
"loss": 0.0015, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.523, |
|
"grad_norm": 0.017692307010293007, |
|
"learning_rate": 2.385e-05, |
|
"loss": 0.0017, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.524, |
|
"grad_norm": 0.007425515912473202, |
|
"learning_rate": 2.38e-05, |
|
"loss": 0.0015, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 0.010397032834589481, |
|
"learning_rate": 2.375e-05, |
|
"loss": 0.0014, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.526, |
|
"grad_norm": 0.013170558027923107, |
|
"learning_rate": 2.37e-05, |
|
"loss": 0.0017, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.527, |
|
"grad_norm": 0.47324055433273315, |
|
"learning_rate": 2.365e-05, |
|
"loss": 0.0037, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 0.06395496428012848, |
|
"learning_rate": 2.36e-05, |
|
"loss": 0.003, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.529, |
|
"grad_norm": 0.032293129712343216, |
|
"learning_rate": 2.355e-05, |
|
"loss": 0.0022, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.021514760330319405, |
|
"learning_rate": 2.35e-05, |
|
"loss": 0.002, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.531, |
|
"grad_norm": 0.016594447195529938, |
|
"learning_rate": 2.345e-05, |
|
"loss": 0.002, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.532, |
|
"grad_norm": 0.020661164075136185, |
|
"learning_rate": 2.3400000000000003e-05, |
|
"loss": 0.0018, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.533, |
|
"grad_norm": 0.01472094189375639, |
|
"learning_rate": 2.3350000000000002e-05, |
|
"loss": 0.0022, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.534, |
|
"grad_norm": 0.014501375146210194, |
|
"learning_rate": 2.3300000000000004e-05, |
|
"loss": 0.0017, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.535, |
|
"grad_norm": 0.01241264771670103, |
|
"learning_rate": 2.3250000000000003e-05, |
|
"loss": 0.0015, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 0.015589526854455471, |
|
"learning_rate": 2.32e-05, |
|
"loss": 0.0018, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.537, |
|
"grad_norm": 0.013468182645738125, |
|
"learning_rate": 2.3150000000000004e-05, |
|
"loss": 0.0018, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.538, |
|
"grad_norm": 0.015258733183145523, |
|
"learning_rate": 2.3100000000000002e-05, |
|
"loss": 0.0015, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.539, |
|
"grad_norm": 0.010932616889476776, |
|
"learning_rate": 2.305e-05, |
|
"loss": 0.0014, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.0102313794195652, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.0014, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.541, |
|
"grad_norm": 0.00674120569601655, |
|
"learning_rate": 2.2950000000000002e-05, |
|
"loss": 0.0014, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.542, |
|
"grad_norm": 0.015179513022303581, |
|
"learning_rate": 2.29e-05, |
|
"loss": 0.0014, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.543, |
|
"grad_norm": 0.03448422998189926, |
|
"learning_rate": 2.2850000000000003e-05, |
|
"loss": 0.0019, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 0.028603358194231987, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 0.0019, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.545, |
|
"grad_norm": 0.014372209087014198, |
|
"learning_rate": 2.275e-05, |
|
"loss": 0.0016, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.546, |
|
"grad_norm": 0.031532082706689835, |
|
"learning_rate": 2.2700000000000003e-05, |
|
"loss": 0.0017, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.547, |
|
"grad_norm": 0.018091056495904922, |
|
"learning_rate": 2.265e-05, |
|
"loss": 0.0016, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.548, |
|
"grad_norm": 0.014843069948256016, |
|
"learning_rate": 2.26e-05, |
|
"loss": 0.0015, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.549, |
|
"grad_norm": 0.011632148176431656, |
|
"learning_rate": 2.2550000000000003e-05, |
|
"loss": 0.0014, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.009511668235063553, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0014, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.551, |
|
"grad_norm": 0.007981637492775917, |
|
"learning_rate": 2.245e-05, |
|
"loss": 0.0014, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 0.021288806572556496, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 0.0015, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.553, |
|
"grad_norm": 0.01468642894178629, |
|
"learning_rate": 2.235e-05, |
|
"loss": 0.0018, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.554, |
|
"grad_norm": 0.011532713659107685, |
|
"learning_rate": 2.23e-05, |
|
"loss": 0.0012, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.555, |
|
"grad_norm": 0.00889046210795641, |
|
"learning_rate": 2.2250000000000002e-05, |
|
"loss": 0.0011, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.556, |
|
"grad_norm": 0.01401284895837307, |
|
"learning_rate": 2.22e-05, |
|
"loss": 0.0014, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.557, |
|
"grad_norm": 0.012369327247142792, |
|
"learning_rate": 2.215e-05, |
|
"loss": 0.0015, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.558, |
|
"grad_norm": 0.015258446335792542, |
|
"learning_rate": 2.2100000000000002e-05, |
|
"loss": 0.0015, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.559, |
|
"grad_norm": 0.009015046060085297, |
|
"learning_rate": 2.205e-05, |
|
"loss": 0.0012, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.011163819581270218, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.0012, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.561, |
|
"grad_norm": 0.016389524564146996, |
|
"learning_rate": 2.195e-05, |
|
"loss": 0.0016, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.562, |
|
"grad_norm": 0.01325678639113903, |
|
"learning_rate": 2.19e-05, |
|
"loss": 0.0013, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.563, |
|
"grad_norm": 0.017966121435165405, |
|
"learning_rate": 2.1850000000000003e-05, |
|
"loss": 0.0014, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.564, |
|
"grad_norm": 0.012039076536893845, |
|
"learning_rate": 2.18e-05, |
|
"loss": 0.0013, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.565, |
|
"grad_norm": 0.006665175314992666, |
|
"learning_rate": 2.175e-05, |
|
"loss": 0.0012, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.566, |
|
"grad_norm": 0.0105441864579916, |
|
"learning_rate": 2.1700000000000002e-05, |
|
"loss": 0.0014, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.567, |
|
"grad_norm": 0.007554101757705212, |
|
"learning_rate": 2.165e-05, |
|
"loss": 0.0011, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 0.009823901578783989, |
|
"learning_rate": 2.16e-05, |
|
"loss": 0.0013, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.569, |
|
"grad_norm": 0.01720455475151539, |
|
"learning_rate": 2.1550000000000002e-05, |
|
"loss": 0.0015, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.01107338909059763, |
|
"learning_rate": 2.15e-05, |
|
"loss": 0.0012, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.571, |
|
"grad_norm": 0.01756761223077774, |
|
"learning_rate": 2.145e-05, |
|
"loss": 0.0014, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.572, |
|
"grad_norm": 0.022118983790278435, |
|
"learning_rate": 2.1400000000000002e-05, |
|
"loss": 0.0015, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.573, |
|
"grad_norm": 0.01616830937564373, |
|
"learning_rate": 2.135e-05, |
|
"loss": 0.0014, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.574, |
|
"grad_norm": 0.020481310784816742, |
|
"learning_rate": 2.13e-05, |
|
"loss": 0.0023, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 0.018176857382059097, |
|
"learning_rate": 2.125e-05, |
|
"loss": 0.0015, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.011317101307213306, |
|
"learning_rate": 2.12e-05, |
|
"loss": 0.0012, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.577, |
|
"grad_norm": 0.028791502118110657, |
|
"learning_rate": 2.115e-05, |
|
"loss": 0.0014, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.578, |
|
"grad_norm": 0.013037024065852165, |
|
"learning_rate": 2.11e-05, |
|
"loss": 0.0013, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.579, |
|
"grad_norm": 0.021426070481538773, |
|
"learning_rate": 2.105e-05, |
|
"loss": 0.0015, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.012033521197736263, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.0011, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.581, |
|
"grad_norm": 0.014337443746626377, |
|
"learning_rate": 2.095e-05, |
|
"loss": 0.0012, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.582, |
|
"grad_norm": 0.008603113703429699, |
|
"learning_rate": 2.09e-05, |
|
"loss": 0.0011, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.583, |
|
"grad_norm": 0.025418557226657867, |
|
"learning_rate": 2.085e-05, |
|
"loss": 0.0014, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 0.008621426299214363, |
|
"learning_rate": 2.08e-05, |
|
"loss": 0.0011, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.585, |
|
"grad_norm": 0.009969389997422695, |
|
"learning_rate": 2.075e-05, |
|
"loss": 0.0015, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.586, |
|
"grad_norm": 0.00997992418706417, |
|
"learning_rate": 2.07e-05, |
|
"loss": 0.0011, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.587, |
|
"grad_norm": 0.019949181005358696, |
|
"learning_rate": 2.065e-05, |
|
"loss": 0.001, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.588, |
|
"grad_norm": 0.009619793854653835, |
|
"learning_rate": 2.06e-05, |
|
"loss": 0.0011, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.589, |
|
"grad_norm": 0.007747489493340254, |
|
"learning_rate": 2.055e-05, |
|
"loss": 0.0012, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.01052554789930582, |
|
"learning_rate": 2.05e-05, |
|
"loss": 0.0014, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.591, |
|
"grad_norm": 0.014904200099408627, |
|
"learning_rate": 2.045e-05, |
|
"loss": 0.0012, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 0.00679561635479331, |
|
"learning_rate": 2.04e-05, |
|
"loss": 0.0011, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.593, |
|
"grad_norm": 0.006072670221328735, |
|
"learning_rate": 2.035e-05, |
|
"loss": 0.0011, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.594, |
|
"grad_norm": 0.014733157120645046, |
|
"learning_rate": 2.0300000000000002e-05, |
|
"loss": 0.0011, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.595, |
|
"grad_norm": 0.015511419624090195, |
|
"learning_rate": 2.025e-05, |
|
"loss": 0.0016, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.596, |
|
"grad_norm": 0.010620438493788242, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 0.0012, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.597, |
|
"grad_norm": 0.0075794099830091, |
|
"learning_rate": 2.0150000000000002e-05, |
|
"loss": 0.0011, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.598, |
|
"grad_norm": 0.007882976904511452, |
|
"learning_rate": 2.01e-05, |
|
"loss": 0.0011, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.599, |
|
"grad_norm": 0.011548763141036034, |
|
"learning_rate": 2.0050000000000003e-05, |
|
"loss": 0.0013, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0084703853353858, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0011, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.601, |
|
"grad_norm": 0.007603704463690519, |
|
"learning_rate": 1.995e-05, |
|
"loss": 0.001, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.602, |
|
"grad_norm": 0.008562711998820305, |
|
"learning_rate": 1.9900000000000003e-05, |
|
"loss": 0.0012, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.603, |
|
"grad_norm": 0.007590813562273979, |
|
"learning_rate": 1.985e-05, |
|
"loss": 0.001, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.604, |
|
"grad_norm": 0.020342741161584854, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 0.0017, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.605, |
|
"grad_norm": 0.16912633180618286, |
|
"learning_rate": 1.9750000000000002e-05, |
|
"loss": 0.0089, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.606, |
|
"grad_norm": 0.08793429285287857, |
|
"learning_rate": 1.97e-05, |
|
"loss": 0.0027, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.607, |
|
"grad_norm": 0.05196760594844818, |
|
"learning_rate": 1.9650000000000003e-05, |
|
"loss": 0.0022, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 0.02118327096104622, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.0021, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.609, |
|
"grad_norm": 0.013289586640894413, |
|
"learning_rate": 1.955e-05, |
|
"loss": 0.0013, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.012911707162857056, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 0.0013, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.611, |
|
"grad_norm": 0.018663186579942703, |
|
"learning_rate": 1.9450000000000002e-05, |
|
"loss": 0.0012, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.612, |
|
"grad_norm": 0.010551884770393372, |
|
"learning_rate": 1.94e-05, |
|
"loss": 0.0012, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.613, |
|
"grad_norm": 0.015853077173233032, |
|
"learning_rate": 1.9350000000000003e-05, |
|
"loss": 0.0013, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.614, |
|
"grad_norm": 0.020374910905957222, |
|
"learning_rate": 1.93e-05, |
|
"loss": 0.001, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.615, |
|
"grad_norm": 0.015159848146140575, |
|
"learning_rate": 1.925e-05, |
|
"loss": 0.0013, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 0.007991676218807697, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.0013, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.617, |
|
"grad_norm": 0.007849587127566338, |
|
"learning_rate": 1.915e-05, |
|
"loss": 0.0011, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.618, |
|
"grad_norm": 0.022048622369766235, |
|
"learning_rate": 1.91e-05, |
|
"loss": 0.001, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.619, |
|
"grad_norm": 0.021215343847870827, |
|
"learning_rate": 1.9050000000000002e-05, |
|
"loss": 0.0011, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.012288344092667103, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.0012, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.621, |
|
"grad_norm": 0.020313331857323647, |
|
"learning_rate": 1.895e-05, |
|
"loss": 0.0011, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.622, |
|
"grad_norm": 0.008762447163462639, |
|
"learning_rate": 1.8900000000000002e-05, |
|
"loss": 0.001, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.623, |
|
"grad_norm": 0.0247616209089756, |
|
"learning_rate": 1.885e-05, |
|
"loss": 0.0011, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 0.09021363407373428, |
|
"learning_rate": 1.88e-05, |
|
"loss": 0.0016, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.017945896834135056, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.0011, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.626, |
|
"grad_norm": 0.011303462088108063, |
|
"learning_rate": 1.87e-05, |
|
"loss": 0.0011, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.627, |
|
"grad_norm": 0.008381664752960205, |
|
"learning_rate": 1.865e-05, |
|
"loss": 0.0011, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.628, |
|
"grad_norm": 0.011003987863659859, |
|
"learning_rate": 1.86e-05, |
|
"loss": 0.0012, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.629, |
|
"grad_norm": 0.015965888276696205, |
|
"learning_rate": 1.855e-05, |
|
"loss": 0.001, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.006507181562483311, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.0009, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.631, |
|
"grad_norm": 0.015577591024339199, |
|
"learning_rate": 1.845e-05, |
|
"loss": 0.001, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 0.006741558667272329, |
|
"learning_rate": 1.84e-05, |
|
"loss": 0.0011, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.633, |
|
"grad_norm": 0.016030525788664818, |
|
"learning_rate": 1.8350000000000002e-05, |
|
"loss": 0.001, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.634, |
|
"grad_norm": 0.010763168334960938, |
|
"learning_rate": 1.83e-05, |
|
"loss": 0.0011, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.635, |
|
"grad_norm": 0.017273874953389168, |
|
"learning_rate": 1.825e-05, |
|
"loss": 0.001, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.636, |
|
"grad_norm": 0.010964670218527317, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 0.0011, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.637, |
|
"grad_norm": 0.00803497713059187, |
|
"learning_rate": 1.815e-05, |
|
"loss": 0.0009, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.638, |
|
"grad_norm": 0.007479315157979727, |
|
"learning_rate": 1.81e-05, |
|
"loss": 0.0014, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.639, |
|
"grad_norm": 0.010598058812320232, |
|
"learning_rate": 1.805e-05, |
|
"loss": 0.001, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.009770036675035954, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0009, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.641, |
|
"grad_norm": 0.011602561920881271, |
|
"learning_rate": 1.795e-05, |
|
"loss": 0.0008, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.642, |
|
"grad_norm": 0.0076597342267632484, |
|
"learning_rate": 1.79e-05, |
|
"loss": 0.0008, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.643, |
|
"grad_norm": 0.012248953804373741, |
|
"learning_rate": 1.785e-05, |
|
"loss": 0.0008, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.644, |
|
"grad_norm": 0.005626557394862175, |
|
"learning_rate": 1.78e-05, |
|
"loss": 0.0008, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.645, |
|
"grad_norm": 0.005482000298798084, |
|
"learning_rate": 1.775e-05, |
|
"loss": 0.0008, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.646, |
|
"grad_norm": 0.007456011138856411, |
|
"learning_rate": 1.77e-05, |
|
"loss": 0.0008, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.647, |
|
"grad_norm": 0.008909308351576328, |
|
"learning_rate": 1.765e-05, |
|
"loss": 0.0008, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 0.011135280132293701, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.0009, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.649, |
|
"grad_norm": 0.01595783233642578, |
|
"learning_rate": 1.755e-05, |
|
"loss": 0.001, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.013902807608246803, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0011, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.651, |
|
"grad_norm": 0.010244622826576233, |
|
"learning_rate": 1.745e-05, |
|
"loss": 0.0009, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.652, |
|
"grad_norm": 0.007476091384887695, |
|
"learning_rate": 1.74e-05, |
|
"loss": 0.0009, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.653, |
|
"grad_norm": 0.013044660910964012, |
|
"learning_rate": 1.7349999999999998e-05, |
|
"loss": 0.0009, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.654, |
|
"grad_norm": 0.004804369527846575, |
|
"learning_rate": 1.73e-05, |
|
"loss": 0.0009, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.655, |
|
"grad_norm": 0.006042002234607935, |
|
"learning_rate": 1.725e-05, |
|
"loss": 0.0008, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 0.010785943828523159, |
|
"learning_rate": 1.7199999999999998e-05, |
|
"loss": 0.0009, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.657, |
|
"grad_norm": 0.011350172571837902, |
|
"learning_rate": 1.7150000000000004e-05, |
|
"loss": 0.0008, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.658, |
|
"grad_norm": 0.007638021372258663, |
|
"learning_rate": 1.7100000000000002e-05, |
|
"loss": 0.0009, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.659, |
|
"grad_norm": 0.005735939834266901, |
|
"learning_rate": 1.705e-05, |
|
"loss": 0.0009, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.02717960625886917, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.0011, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.661, |
|
"grad_norm": 0.006012643221765757, |
|
"learning_rate": 1.6950000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.662, |
|
"grad_norm": 0.00599683728069067, |
|
"learning_rate": 1.69e-05, |
|
"loss": 0.0008, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.663, |
|
"grad_norm": 0.026952974498271942, |
|
"learning_rate": 1.6850000000000003e-05, |
|
"loss": 0.0008, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 0.008171536959707737, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.665, |
|
"grad_norm": 0.007446442265063524, |
|
"learning_rate": 1.675e-05, |
|
"loss": 0.0009, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.666, |
|
"grad_norm": 0.006456063129007816, |
|
"learning_rate": 1.6700000000000003e-05, |
|
"loss": 0.0008, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.667, |
|
"grad_norm": 0.008162173442542553, |
|
"learning_rate": 1.665e-05, |
|
"loss": 0.0007, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.668, |
|
"grad_norm": 0.004432919900864363, |
|
"learning_rate": 1.66e-05, |
|
"loss": 0.0008, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.669, |
|
"grad_norm": 0.007158307824283838, |
|
"learning_rate": 1.6550000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.003983801696449518, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.0007, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.671, |
|
"grad_norm": 0.005170087795704603, |
|
"learning_rate": 1.645e-05, |
|
"loss": 0.0008, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 0.004729804117232561, |
|
"learning_rate": 1.6400000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.673, |
|
"grad_norm": 0.010037174448370934, |
|
"learning_rate": 1.635e-05, |
|
"loss": 0.001, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.674, |
|
"grad_norm": 0.050949569791555405, |
|
"learning_rate": 1.63e-05, |
|
"loss": 0.0023, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 0.0323474146425724, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.0017, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.676, |
|
"grad_norm": 0.027231359854340553, |
|
"learning_rate": 1.62e-05, |
|
"loss": 0.0021, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.677, |
|
"grad_norm": 0.01555855292826891, |
|
"learning_rate": 1.6150000000000003e-05, |
|
"loss": 0.0013, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.678, |
|
"grad_norm": 0.01804298162460327, |
|
"learning_rate": 1.6100000000000002e-05, |
|
"loss": 0.0011, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.679, |
|
"grad_norm": 0.011248771101236343, |
|
"learning_rate": 1.605e-05, |
|
"loss": 0.0011, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.007389044389128685, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0009, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.681, |
|
"grad_norm": 0.014606145210564137, |
|
"learning_rate": 1.595e-05, |
|
"loss": 0.0012, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.682, |
|
"grad_norm": 0.012476052157580853, |
|
"learning_rate": 1.59e-05, |
|
"loss": 0.0009, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.683, |
|
"grad_norm": 0.009272475726902485, |
|
"learning_rate": 1.5850000000000002e-05, |
|
"loss": 0.0009, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.684, |
|
"grad_norm": 0.011705187149345875, |
|
"learning_rate": 1.58e-05, |
|
"loss": 0.0009, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.685, |
|
"grad_norm": 0.01874556578695774, |
|
"learning_rate": 1.575e-05, |
|
"loss": 0.0011, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.686, |
|
"grad_norm": 0.01463324110955, |
|
"learning_rate": 1.5700000000000002e-05, |
|
"loss": 0.0009, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.687, |
|
"grad_norm": 0.012001392431557178, |
|
"learning_rate": 1.565e-05, |
|
"loss": 0.001, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 0.009366356767714024, |
|
"learning_rate": 1.56e-05, |
|
"loss": 0.0008, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.689, |
|
"grad_norm": 0.010064000263810158, |
|
"learning_rate": 1.5550000000000002e-05, |
|
"loss": 0.0009, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.016703909263014793, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.0009, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.691, |
|
"grad_norm": 0.0146669652312994, |
|
"learning_rate": 1.545e-05, |
|
"loss": 0.001, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.692, |
|
"grad_norm": 0.006643705535680056, |
|
"learning_rate": 1.54e-05, |
|
"loss": 0.0009, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.693, |
|
"grad_norm": 0.011501871049404144, |
|
"learning_rate": 1.535e-05, |
|
"loss": 0.0008, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.694, |
|
"grad_norm": 0.008170065470039845, |
|
"learning_rate": 1.53e-05, |
|
"loss": 0.0008, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.695, |
|
"grad_norm": 0.00737554719671607, |
|
"learning_rate": 1.525e-05, |
|
"loss": 0.0007, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 0.006846282631158829, |
|
"learning_rate": 1.52e-05, |
|
"loss": 0.0009, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.697, |
|
"grad_norm": 0.007784941233694553, |
|
"learning_rate": 1.515e-05, |
|
"loss": 0.0008, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.698, |
|
"grad_norm": 0.009864069521427155, |
|
"learning_rate": 1.51e-05, |
|
"loss": 0.0008, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.699, |
|
"grad_norm": 0.007372863125056028, |
|
"learning_rate": 1.505e-05, |
|
"loss": 0.0009, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.006507135462015867, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0008, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.701, |
|
"grad_norm": 0.03093353845179081, |
|
"learning_rate": 1.4950000000000001e-05, |
|
"loss": 0.0014, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.702, |
|
"grad_norm": 0.01417300570756197, |
|
"learning_rate": 1.49e-05, |
|
"loss": 0.001, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.703, |
|
"grad_norm": 0.010836401022970676, |
|
"learning_rate": 1.485e-05, |
|
"loss": 0.0012, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 0.01000068336725235, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.001, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.705, |
|
"grad_norm": 0.008654952049255371, |
|
"learning_rate": 1.475e-05, |
|
"loss": 0.0009, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.706, |
|
"grad_norm": 0.010761331766843796, |
|
"learning_rate": 1.47e-05, |
|
"loss": 0.001, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.707, |
|
"grad_norm": 0.006188638508319855, |
|
"learning_rate": 1.465e-05, |
|
"loss": 0.0008, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.708, |
|
"grad_norm": 0.007858789525926113, |
|
"learning_rate": 1.4599999999999999e-05, |
|
"loss": 0.0008, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.709, |
|
"grad_norm": 0.02773350477218628, |
|
"learning_rate": 1.455e-05, |
|
"loss": 0.0014, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.012381108477711678, |
|
"learning_rate": 1.45e-05, |
|
"loss": 0.0009, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.711, |
|
"grad_norm": 0.009256324730813503, |
|
"learning_rate": 1.4449999999999999e-05, |
|
"loss": 0.0008, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 0.007005748804658651, |
|
"learning_rate": 1.44e-05, |
|
"loss": 0.0009, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.713, |
|
"grad_norm": 0.0055755749344825745, |
|
"learning_rate": 1.435e-05, |
|
"loss": 0.0007, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.714, |
|
"grad_norm": 0.003967254888266325, |
|
"learning_rate": 1.43e-05, |
|
"loss": 0.0008, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.715, |
|
"grad_norm": 0.0079165268689394, |
|
"learning_rate": 1.4249999999999999e-05, |
|
"loss": 0.0011, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.716, |
|
"grad_norm": 0.004682580940425396, |
|
"learning_rate": 1.42e-05, |
|
"loss": 0.0007, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.717, |
|
"grad_norm": 0.008578700013458729, |
|
"learning_rate": 1.415e-05, |
|
"loss": 0.0011, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.718, |
|
"grad_norm": 0.006943961605429649, |
|
"learning_rate": 1.4099999999999999e-05, |
|
"loss": 0.0009, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.719, |
|
"grad_norm": 0.0072656250558793545, |
|
"learning_rate": 1.4050000000000003e-05, |
|
"loss": 0.0007, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.005639955401420593, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.0007, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.721, |
|
"grad_norm": 0.005733838304877281, |
|
"learning_rate": 1.3950000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.722, |
|
"grad_norm": 0.02654002234339714, |
|
"learning_rate": 1.3900000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.723, |
|
"grad_norm": 0.007308628410100937, |
|
"learning_rate": 1.3850000000000001e-05, |
|
"loss": 0.0008, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.724, |
|
"grad_norm": 0.006939894054085016, |
|
"learning_rate": 1.3800000000000002e-05, |
|
"loss": 0.0007, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 0.03964811936020851, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 0.0013, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.726, |
|
"grad_norm": 0.014138396829366684, |
|
"learning_rate": 1.3700000000000001e-05, |
|
"loss": 0.001, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.727, |
|
"grad_norm": 0.008445181883871555, |
|
"learning_rate": 1.3650000000000001e-05, |
|
"loss": 0.0008, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 0.01134855579584837, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.0009, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.729, |
|
"grad_norm": 0.010982022620737553, |
|
"learning_rate": 1.3550000000000002e-05, |
|
"loss": 0.0015, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.011698734015226364, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 0.0008, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.731, |
|
"grad_norm": 0.006420729216188192, |
|
"learning_rate": 1.3450000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.732, |
|
"grad_norm": 0.006088167428970337, |
|
"learning_rate": 1.3400000000000002e-05, |
|
"loss": 0.0008, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.733, |
|
"grad_norm": 0.0071141645312309265, |
|
"learning_rate": 1.3350000000000001e-05, |
|
"loss": 0.0012, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.734, |
|
"grad_norm": 0.004975921008735895, |
|
"learning_rate": 1.3300000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.735, |
|
"grad_norm": 0.004499469883739948, |
|
"learning_rate": 1.3250000000000002e-05, |
|
"loss": 0.0007, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 0.009738982655107975, |
|
"learning_rate": 1.32e-05, |
|
"loss": 0.001, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.737, |
|
"grad_norm": 0.006863337475806475, |
|
"learning_rate": 1.3150000000000001e-05, |
|
"loss": 0.001, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.738, |
|
"grad_norm": 0.008216536603868008, |
|
"learning_rate": 1.3100000000000002e-05, |
|
"loss": 0.0007, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.739, |
|
"grad_norm": 0.006803369149565697, |
|
"learning_rate": 1.305e-05, |
|
"loss": 0.0008, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.00551017839461565, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.0008, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.741, |
|
"grad_norm": 0.009463651105761528, |
|
"learning_rate": 1.2950000000000001e-05, |
|
"loss": 0.0008, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.742, |
|
"grad_norm": 0.01233983039855957, |
|
"learning_rate": 1.29e-05, |
|
"loss": 0.0019, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.743, |
|
"grad_norm": 0.008470877073705196, |
|
"learning_rate": 1.285e-05, |
|
"loss": 0.0009, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 0.007592742796987295, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 0.0008, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.745, |
|
"grad_norm": 0.03596987947821617, |
|
"learning_rate": 1.2750000000000002e-05, |
|
"loss": 0.001, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.746, |
|
"grad_norm": 0.005849502049386501, |
|
"learning_rate": 1.27e-05, |
|
"loss": 0.0008, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.747, |
|
"grad_norm": 0.009035659022629261, |
|
"learning_rate": 1.2650000000000001e-05, |
|
"loss": 0.0007, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.748, |
|
"grad_norm": 0.010397679172456264, |
|
"learning_rate": 1.2600000000000001e-05, |
|
"loss": 0.0014, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.749, |
|
"grad_norm": 0.014514378271996975, |
|
"learning_rate": 1.255e-05, |
|
"loss": 0.0008, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.004837281536310911, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0006, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.751, |
|
"grad_norm": 0.007720770314335823, |
|
"learning_rate": 1.2450000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 0.012046804651618004, |
|
"learning_rate": 1.24e-05, |
|
"loss": 0.0011, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.753, |
|
"grad_norm": 0.01343387458473444, |
|
"learning_rate": 1.235e-05, |
|
"loss": 0.0007, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.754, |
|
"grad_norm": 0.00810600072145462, |
|
"learning_rate": 1.23e-05, |
|
"loss": 0.0007, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.755, |
|
"grad_norm": 0.00925883837044239, |
|
"learning_rate": 1.225e-05, |
|
"loss": 0.0006, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.756, |
|
"grad_norm": 0.01927885413169861, |
|
"learning_rate": 1.22e-05, |
|
"loss": 0.0014, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.757, |
|
"grad_norm": 0.010129665955901146, |
|
"learning_rate": 1.215e-05, |
|
"loss": 0.0006, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.758, |
|
"grad_norm": 0.007863885723054409, |
|
"learning_rate": 1.2100000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.759, |
|
"grad_norm": 0.005500464700162411, |
|
"learning_rate": 1.205e-05, |
|
"loss": 0.0007, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0040563903748989105, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0006, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.761, |
|
"grad_norm": 0.006361998151987791, |
|
"learning_rate": 1.195e-05, |
|
"loss": 0.0007, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.762, |
|
"grad_norm": 0.0136310625821352, |
|
"learning_rate": 1.19e-05, |
|
"loss": 0.0008, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.763, |
|
"grad_norm": 0.005384715739637613, |
|
"learning_rate": 1.185e-05, |
|
"loss": 0.0007, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.764, |
|
"grad_norm": 0.014707676135003567, |
|
"learning_rate": 1.18e-05, |
|
"loss": 0.0007, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.765, |
|
"grad_norm": 0.008092684671282768, |
|
"learning_rate": 1.175e-05, |
|
"loss": 0.0006, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.766, |
|
"grad_norm": 0.007185132242739201, |
|
"learning_rate": 1.1700000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.767, |
|
"grad_norm": 0.005672789178788662, |
|
"learning_rate": 1.1650000000000002e-05, |
|
"loss": 0.0006, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.05434956029057503, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.001, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.769, |
|
"grad_norm": 0.00933472067117691, |
|
"learning_rate": 1.1550000000000001e-05, |
|
"loss": 0.0007, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.008684621192514896, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 0.0006, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.771, |
|
"grad_norm": 0.03054739721119404, |
|
"learning_rate": 1.145e-05, |
|
"loss": 0.0006, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.772, |
|
"grad_norm": 0.005998207256197929, |
|
"learning_rate": 1.1400000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.773, |
|
"grad_norm": 0.006153833121061325, |
|
"learning_rate": 1.1350000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.774, |
|
"grad_norm": 0.007491481024771929, |
|
"learning_rate": 1.13e-05, |
|
"loss": 0.0007, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 0.01078925933688879, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.0006, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 0.005885554943233728, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.777, |
|
"grad_norm": 0.005423078313469887, |
|
"learning_rate": 1.115e-05, |
|
"loss": 0.0007, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.778, |
|
"grad_norm": 0.008044522255659103, |
|
"learning_rate": 1.11e-05, |
|
"loss": 0.0006, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.779, |
|
"grad_norm": 0.00733207818120718, |
|
"learning_rate": 1.1050000000000001e-05, |
|
"loss": 0.0007, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0066906120628118515, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.0009, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.781, |
|
"grad_norm": 0.004443836398422718, |
|
"learning_rate": 1.095e-05, |
|
"loss": 0.0006, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.782, |
|
"grad_norm": 0.0058379145339131355, |
|
"learning_rate": 1.09e-05, |
|
"loss": 0.0007, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.783, |
|
"grad_norm": 0.006808693055063486, |
|
"learning_rate": 1.0850000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 0.008773542940616608, |
|
"learning_rate": 1.08e-05, |
|
"loss": 0.0006, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.785, |
|
"grad_norm": 0.006700740661472082, |
|
"learning_rate": 1.075e-05, |
|
"loss": 0.0006, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.786, |
|
"grad_norm": 0.00906393863260746, |
|
"learning_rate": 1.0700000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.787, |
|
"grad_norm": 0.0030822190456092358, |
|
"learning_rate": 1.065e-05, |
|
"loss": 0.0005, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.788, |
|
"grad_norm": 0.0029632148798555136, |
|
"learning_rate": 1.06e-05, |
|
"loss": 0.0005, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.789, |
|
"grad_norm": 0.004798842128366232, |
|
"learning_rate": 1.055e-05, |
|
"loss": 0.0006, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.007376812864094973, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.0005, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.791, |
|
"grad_norm": 0.009337624534964561, |
|
"learning_rate": 1.045e-05, |
|
"loss": 0.0009, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 0.012847904115915298, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.0008, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.793, |
|
"grad_norm": 0.005587203428149223, |
|
"learning_rate": 1.035e-05, |
|
"loss": 0.0006, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.794, |
|
"grad_norm": 0.008464600890874863, |
|
"learning_rate": 1.03e-05, |
|
"loss": 0.0006, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.795, |
|
"grad_norm": 0.2516852617263794, |
|
"learning_rate": 1.025e-05, |
|
"loss": 0.002, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.796, |
|
"grad_norm": 0.04664693772792816, |
|
"learning_rate": 1.02e-05, |
|
"loss": 0.002, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.797, |
|
"grad_norm": 0.02456306852400303, |
|
"learning_rate": 1.0150000000000001e-05, |
|
"loss": 0.0013, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.798, |
|
"grad_norm": 0.011320951394736767, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 0.0009, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.799, |
|
"grad_norm": 0.01860683411359787, |
|
"learning_rate": 1.005e-05, |
|
"loss": 0.0012, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03227970749139786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0009, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.801, |
|
"grad_norm": 0.015873363241553307, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 0.0008, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.802, |
|
"grad_norm": 0.005454899277538061, |
|
"learning_rate": 9.900000000000002e-06, |
|
"loss": 0.0008, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.803, |
|
"grad_norm": 0.007948348298668861, |
|
"learning_rate": 9.85e-06, |
|
"loss": 0.0007, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.804, |
|
"grad_norm": 0.013328757137060165, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.805, |
|
"grad_norm": 0.01018743496388197, |
|
"learning_rate": 9.750000000000002e-06, |
|
"loss": 0.0012, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.806, |
|
"grad_norm": 0.009421809576451778, |
|
"learning_rate": 9.7e-06, |
|
"loss": 0.0008, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.807, |
|
"grad_norm": 0.005202045664191246, |
|
"learning_rate": 9.65e-06, |
|
"loss": 0.0007, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 0.012956002727150917, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.0007, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.809, |
|
"grad_norm": 0.006403383333235979, |
|
"learning_rate": 9.55e-06, |
|
"loss": 0.0007, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.027560915797948837, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.0008, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.811, |
|
"grad_norm": 0.005196988116949797, |
|
"learning_rate": 9.450000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.812, |
|
"grad_norm": 0.009510821662843227, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.0006, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.813, |
|
"grad_norm": 0.006430651992559433, |
|
"learning_rate": 9.35e-06, |
|
"loss": 0.0006, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.814, |
|
"grad_norm": 0.019426727667450905, |
|
"learning_rate": 9.3e-06, |
|
"loss": 0.0009, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.815, |
|
"grad_norm": 0.011564865708351135, |
|
"learning_rate": 9.25e-06, |
|
"loss": 0.0006, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 0.009036659263074398, |
|
"learning_rate": 9.2e-06, |
|
"loss": 0.0008, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.817, |
|
"grad_norm": 0.006685588974505663, |
|
"learning_rate": 9.15e-06, |
|
"loss": 0.0007, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.818, |
|
"grad_norm": 0.005980687215924263, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.819, |
|
"grad_norm": 0.0029402158688753843, |
|
"learning_rate": 9.05e-06, |
|
"loss": 0.0005, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.0034720194526016712, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0006, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.821, |
|
"grad_norm": 0.008967465721070766, |
|
"learning_rate": 8.95e-06, |
|
"loss": 0.0009, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.822, |
|
"grad_norm": 0.007418784312903881, |
|
"learning_rate": 8.9e-06, |
|
"loss": 0.0007, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.823, |
|
"grad_norm": 0.0077253603376448154, |
|
"learning_rate": 8.85e-06, |
|
"loss": 0.0006, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 0.011202674359083176, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.0013, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 0.022354573011398315, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.0008, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.826, |
|
"grad_norm": 0.01750505343079567, |
|
"learning_rate": 8.7e-06, |
|
"loss": 0.0013, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.827, |
|
"grad_norm": 0.01153852604329586, |
|
"learning_rate": 8.65e-06, |
|
"loss": 0.0009, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.828, |
|
"grad_norm": 0.008752427063882351, |
|
"learning_rate": 8.599999999999999e-06, |
|
"loss": 0.0006, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.829, |
|
"grad_norm": 0.007307702675461769, |
|
"learning_rate": 8.550000000000001e-06, |
|
"loss": 0.0007, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.0077101094648242, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 0.0006, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.831, |
|
"grad_norm": 0.006358897779136896, |
|
"learning_rate": 8.45e-06, |
|
"loss": 0.0005, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 0.003663134528324008, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.833, |
|
"grad_norm": 0.005117372144013643, |
|
"learning_rate": 8.350000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.834, |
|
"grad_norm": 0.004245636984705925, |
|
"learning_rate": 8.3e-06, |
|
"loss": 0.0005, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.835, |
|
"grad_norm": 0.005357146263122559, |
|
"learning_rate": 8.25e-06, |
|
"loss": 0.0006, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.836, |
|
"grad_norm": 0.01055213250219822, |
|
"learning_rate": 8.200000000000001e-06, |
|
"loss": 0.0008, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.837, |
|
"grad_norm": 0.01871907152235508, |
|
"learning_rate": 8.15e-06, |
|
"loss": 0.0007, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.838, |
|
"grad_norm": 0.013110162690281868, |
|
"learning_rate": 8.1e-06, |
|
"loss": 0.0005, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.839, |
|
"grad_norm": 0.005271353758871555, |
|
"learning_rate": 8.050000000000001e-06, |
|
"loss": 0.0007, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.004324494861066341, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.841, |
|
"grad_norm": 0.0031851409003138542, |
|
"learning_rate": 7.95e-06, |
|
"loss": 0.0006, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.842, |
|
"grad_norm": 0.009736557491123676, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.0006, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.843, |
|
"grad_norm": 0.005168536212295294, |
|
"learning_rate": 7.850000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.844, |
|
"grad_norm": 0.002579685300588608, |
|
"learning_rate": 7.8e-06, |
|
"loss": 0.0005, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.845, |
|
"grad_norm": 0.008710252121090889, |
|
"learning_rate": 7.75e-06, |
|
"loss": 0.0005, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.846, |
|
"grad_norm": 0.004952189512550831, |
|
"learning_rate": 7.7e-06, |
|
"loss": 0.0008, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.847, |
|
"grad_norm": 0.003375423140823841, |
|
"learning_rate": 7.65e-06, |
|
"loss": 0.0005, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 0.13184253871440887, |
|
"learning_rate": 7.6e-06, |
|
"loss": 0.0012, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.849, |
|
"grad_norm": 0.017549166455864906, |
|
"learning_rate": 7.55e-06, |
|
"loss": 0.0007, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.00852286908775568, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0006, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.851, |
|
"grad_norm": 0.005547389388084412, |
|
"learning_rate": 7.45e-06, |
|
"loss": 0.0005, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.852, |
|
"grad_norm": 0.0061622606590390205, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.0005, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.853, |
|
"grad_norm": 0.005182339809834957, |
|
"learning_rate": 7.35e-06, |
|
"loss": 0.0008, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.854, |
|
"grad_norm": 0.005366960074752569, |
|
"learning_rate": 7.2999999999999996e-06, |
|
"loss": 0.0006, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.855, |
|
"grad_norm": 0.005542315077036619, |
|
"learning_rate": 7.25e-06, |
|
"loss": 0.0006, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 0.003940809518098831, |
|
"learning_rate": 7.2e-06, |
|
"loss": 0.0005, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.857, |
|
"grad_norm": 0.003730529686436057, |
|
"learning_rate": 7.15e-06, |
|
"loss": 0.0006, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.858, |
|
"grad_norm": 0.0033961348235607147, |
|
"learning_rate": 7.1e-06, |
|
"loss": 0.0005, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.859, |
|
"grad_norm": 0.004546662792563438, |
|
"learning_rate": 7.049999999999999e-06, |
|
"loss": 0.0006, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.009168008342385292, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.861, |
|
"grad_norm": 0.008373426273465157, |
|
"learning_rate": 6.950000000000001e-06, |
|
"loss": 0.0008, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.862, |
|
"grad_norm": 0.004947313107550144, |
|
"learning_rate": 6.900000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.863, |
|
"grad_norm": 0.015127859078347683, |
|
"learning_rate": 6.8500000000000005e-06, |
|
"loss": 0.0006, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 0.0056435600854456425, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.865, |
|
"grad_norm": 0.004109732341021299, |
|
"learning_rate": 6.750000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.866, |
|
"grad_norm": 0.006170314736664295, |
|
"learning_rate": 6.700000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.867, |
|
"grad_norm": 0.002802550094202161, |
|
"learning_rate": 6.650000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.868, |
|
"grad_norm": 0.0029788350220769644, |
|
"learning_rate": 6.6e-06, |
|
"loss": 0.0004, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.869, |
|
"grad_norm": 0.013022363185882568, |
|
"learning_rate": 6.550000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.0036853367928415537, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.0006, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.871, |
|
"grad_norm": 0.002578242914751172, |
|
"learning_rate": 6.45e-06, |
|
"loss": 0.0005, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 0.0036895396187901497, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.0005, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.873, |
|
"grad_norm": 0.006020987406373024, |
|
"learning_rate": 6.35e-06, |
|
"loss": 0.0005, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.874, |
|
"grad_norm": 0.006671608425676823, |
|
"learning_rate": 6.300000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 0.0038102639373391867, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0006, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.876, |
|
"grad_norm": 0.006786294747143984, |
|
"learning_rate": 6.2e-06, |
|
"loss": 0.0004, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.877, |
|
"grad_norm": 0.00381205091252923, |
|
"learning_rate": 6.15e-06, |
|
"loss": 0.0004, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.878, |
|
"grad_norm": 0.007368630729615688, |
|
"learning_rate": 6.1e-06, |
|
"loss": 0.0005, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.879, |
|
"grad_norm": 0.0035172586794942617, |
|
"learning_rate": 6.0500000000000005e-06, |
|
"loss": 0.0006, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.005555720068514347, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0007, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.881, |
|
"grad_norm": 0.0076825893484056, |
|
"learning_rate": 5.95e-06, |
|
"loss": 0.0005, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.882, |
|
"grad_norm": 0.0055446140468120575, |
|
"learning_rate": 5.9e-06, |
|
"loss": 0.0005, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.883, |
|
"grad_norm": 0.002265618182718754, |
|
"learning_rate": 5.850000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.884, |
|
"grad_norm": 0.003428585361689329, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.0004, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.885, |
|
"grad_norm": 0.0044764927588403225, |
|
"learning_rate": 5.750000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.886, |
|
"grad_norm": 0.003201392712071538, |
|
"learning_rate": 5.7000000000000005e-06, |
|
"loss": 0.0005, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.887, |
|
"grad_norm": 0.0029762780759483576, |
|
"learning_rate": 5.65e-06, |
|
"loss": 0.0006, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 0.07450267672538757, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0009, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.889, |
|
"grad_norm": 0.006392148323357105, |
|
"learning_rate": 5.55e-06, |
|
"loss": 0.0006, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0038995451759546995, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.891, |
|
"grad_norm": 0.0028438065201044083, |
|
"learning_rate": 5.45e-06, |
|
"loss": 0.0004, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.892, |
|
"grad_norm": 0.003168331226333976, |
|
"learning_rate": 5.4e-06, |
|
"loss": 0.0004, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.893, |
|
"grad_norm": 0.0026163198053836823, |
|
"learning_rate": 5.3500000000000004e-06, |
|
"loss": 0.0004, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.894, |
|
"grad_norm": 0.0029086521826684475, |
|
"learning_rate": 5.3e-06, |
|
"loss": 0.0005, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.895, |
|
"grad_norm": 0.011433840729296207, |
|
"learning_rate": 5.25e-06, |
|
"loss": 0.0007, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 0.01782575435936451, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.0011, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.897, |
|
"grad_norm": 0.00613692682236433, |
|
"learning_rate": 5.15e-06, |
|
"loss": 0.0004, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.898, |
|
"grad_norm": 0.02408697083592415, |
|
"learning_rate": 5.1e-06, |
|
"loss": 0.0007, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.899, |
|
"grad_norm": 0.004028539173305035, |
|
"learning_rate": 5.050000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.0032080088276416063, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0005, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.901, |
|
"grad_norm": 0.0035681568551808596, |
|
"learning_rate": 4.950000000000001e-06, |
|
"loss": 0.0004, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.902, |
|
"grad_norm": 0.007591512985527515, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.0005, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.903, |
|
"grad_norm": 0.004855870269238949, |
|
"learning_rate": 4.85e-06, |
|
"loss": 0.0004, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 0.004854188766330481, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.0004, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.905, |
|
"grad_norm": 0.004117886070162058, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.0005, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.906, |
|
"grad_norm": 0.0045243133790791035, |
|
"learning_rate": 4.7e-06, |
|
"loss": 0.0005, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.907, |
|
"grad_norm": 0.001863984507508576, |
|
"learning_rate": 4.65e-06, |
|
"loss": 0.0004, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.908, |
|
"grad_norm": 0.002472365740686655, |
|
"learning_rate": 4.6e-06, |
|
"loss": 0.0005, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.909, |
|
"grad_norm": 0.0020466954447329044, |
|
"learning_rate": 4.5500000000000005e-06, |
|
"loss": 0.0004, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.004180034622550011, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0004, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.911, |
|
"grad_norm": 0.00341266137547791, |
|
"learning_rate": 4.45e-06, |
|
"loss": 0.0006, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 0.006567875389009714, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.0004, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.913, |
|
"grad_norm": 0.003975498490035534, |
|
"learning_rate": 4.35e-06, |
|
"loss": 0.0006, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.914, |
|
"grad_norm": 0.003391894046217203, |
|
"learning_rate": 4.2999999999999995e-06, |
|
"loss": 0.0006, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.915, |
|
"grad_norm": 0.005821021273732185, |
|
"learning_rate": 4.250000000000001e-06, |
|
"loss": 0.0004, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.916, |
|
"grad_norm": 0.0022448371164500713, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.0004, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.917, |
|
"grad_norm": 0.003718709573149681, |
|
"learning_rate": 4.15e-06, |
|
"loss": 0.0004, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.918, |
|
"grad_norm": 0.008243223652243614, |
|
"learning_rate": 4.1000000000000006e-06, |
|
"loss": 0.0007, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.919, |
|
"grad_norm": 0.010773789137601852, |
|
"learning_rate": 4.05e-06, |
|
"loss": 0.0007, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.006589268799871206, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.921, |
|
"grad_norm": 0.0026856744661927223, |
|
"learning_rate": 3.95e-06, |
|
"loss": 0.0004, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.922, |
|
"grad_norm": 0.012134186923503876, |
|
"learning_rate": 3.9e-06, |
|
"loss": 0.0005, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.923, |
|
"grad_norm": 0.004260225687175989, |
|
"learning_rate": 3.85e-06, |
|
"loss": 0.0005, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.924, |
|
"grad_norm": 0.0023803950753062963, |
|
"learning_rate": 3.8e-06, |
|
"loss": 0.0004, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 0.0037502460181713104, |
|
"learning_rate": 3.75e-06, |
|
"loss": 0.0005, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.926, |
|
"grad_norm": 0.0017525887815281749, |
|
"learning_rate": 3.7e-06, |
|
"loss": 0.0003, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.927, |
|
"grad_norm": 0.003996537532657385, |
|
"learning_rate": 3.6499999999999998e-06, |
|
"loss": 0.0005, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 0.009158821776509285, |
|
"learning_rate": 3.6e-06, |
|
"loss": 0.0007, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.929, |
|
"grad_norm": 0.003372638253495097, |
|
"learning_rate": 3.55e-06, |
|
"loss": 0.0004, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.0026602360885590315, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"loss": 0.0004, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.931, |
|
"grad_norm": 0.014532738365232944, |
|
"learning_rate": 3.4500000000000004e-06, |
|
"loss": 0.0007, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.932, |
|
"grad_norm": 0.002912462456151843, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.0004, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.933, |
|
"grad_norm": 0.0052029709331691265, |
|
"learning_rate": 3.3500000000000005e-06, |
|
"loss": 0.0006, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.934, |
|
"grad_norm": 0.016220854595303535, |
|
"learning_rate": 3.3e-06, |
|
"loss": 0.0004, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.935, |
|
"grad_norm": 0.0030162036418914795, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.0004, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 0.002491691382601857, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.0004, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.937, |
|
"grad_norm": 0.022630969062447548, |
|
"learning_rate": 3.1500000000000003e-06, |
|
"loss": 0.0005, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.938, |
|
"grad_norm": 0.005951160565018654, |
|
"learning_rate": 3.1e-06, |
|
"loss": 0.0005, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.939, |
|
"grad_norm": 0.0024763622786849737, |
|
"learning_rate": 3.05e-06, |
|
"loss": 0.0005, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.0049979290924966335, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0005, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.941, |
|
"grad_norm": 0.0025999436620622873, |
|
"learning_rate": 2.95e-06, |
|
"loss": 0.0004, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.942, |
|
"grad_norm": 0.004584169946610928, |
|
"learning_rate": 2.9e-06, |
|
"loss": 0.0006, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.943, |
|
"grad_norm": 0.005211680196225643, |
|
"learning_rate": 2.8500000000000002e-06, |
|
"loss": 0.0005, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 0.0022507943212985992, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.0004, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.945, |
|
"grad_norm": 0.0029024691320955753, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 0.0004, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.946, |
|
"grad_norm": 0.003968573175370693, |
|
"learning_rate": 2.7e-06, |
|
"loss": 0.0004, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.947, |
|
"grad_norm": 0.003264777595177293, |
|
"learning_rate": 2.65e-06, |
|
"loss": 0.0005, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.948, |
|
"grad_norm": 0.0048127188347280025, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.0004, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.949, |
|
"grad_norm": 0.004405410494655371, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.0006, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.00462340796366334, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0004, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.951, |
|
"grad_norm": 0.0021721452940255404, |
|
"learning_rate": 2.4500000000000003e-06, |
|
"loss": 0.0004, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 0.002355078933760524, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.0006, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.953, |
|
"grad_norm": 0.0022414589766412973, |
|
"learning_rate": 2.35e-06, |
|
"loss": 0.0004, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.954, |
|
"grad_norm": 0.012005253694951534, |
|
"learning_rate": 2.3e-06, |
|
"loss": 0.0006, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.955, |
|
"grad_norm": 0.00513832364231348, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.0005, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.956, |
|
"grad_norm": 0.0027625642251223326, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.0005, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.957, |
|
"grad_norm": 0.008645957335829735, |
|
"learning_rate": 2.1499999999999997e-06, |
|
"loss": 0.0005, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.958, |
|
"grad_norm": 0.00188863230869174, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 0.0004, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.959, |
|
"grad_norm": 0.0025561931543052197, |
|
"learning_rate": 2.0500000000000003e-06, |
|
"loss": 0.0004, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.0033618698362261057, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0004, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.961, |
|
"grad_norm": 0.0018735548947006464, |
|
"learning_rate": 1.95e-06, |
|
"loss": 0.0004, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.962, |
|
"grad_norm": 0.0019359014695510268, |
|
"learning_rate": 1.9e-06, |
|
"loss": 0.0005, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.963, |
|
"grad_norm": 0.005369434133172035, |
|
"learning_rate": 1.85e-06, |
|
"loss": 0.0004, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.964, |
|
"grad_norm": 0.0017576682148501277, |
|
"learning_rate": 1.8e-06, |
|
"loss": 0.0004, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.965, |
|
"grad_norm": 0.002633103635162115, |
|
"learning_rate": 1.7500000000000002e-06, |
|
"loss": 0.0004, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.966, |
|
"grad_norm": 0.007023205049335957, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 0.0004, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.967, |
|
"grad_norm": 0.0026062438264489174, |
|
"learning_rate": 1.65e-06, |
|
"loss": 0.0005, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 0.0025111304130405188, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.969, |
|
"grad_norm": 0.0028218806255608797, |
|
"learning_rate": 1.55e-06, |
|
"loss": 0.0004, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.0024802633561193943, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0005, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.971, |
|
"grad_norm": 0.002883223118260503, |
|
"learning_rate": 1.45e-06, |
|
"loss": 0.0005, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.972, |
|
"grad_norm": 0.002503247233107686, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.973, |
|
"grad_norm": 0.002495008986443281, |
|
"learning_rate": 1.35e-06, |
|
"loss": 0.0006, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.974, |
|
"grad_norm": 0.03429775312542915, |
|
"learning_rate": 1.3e-06, |
|
"loss": 0.0006, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 0.003482217201963067, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.0004, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 0.001837963704019785, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.0004, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.977, |
|
"grad_norm": 0.0020507893059402704, |
|
"learning_rate": 1.15e-06, |
|
"loss": 0.0004, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.978, |
|
"grad_norm": 0.0022647210862487555, |
|
"learning_rate": 1.1e-06, |
|
"loss": 0.0005, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.979, |
|
"grad_norm": 0.0017425378318876028, |
|
"learning_rate": 1.0500000000000001e-06, |
|
"loss": 0.0004, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.2319187968969345, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0021, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.981, |
|
"grad_norm": 0.01799739897251129, |
|
"learning_rate": 9.5e-07, |
|
"loss": 0.0006, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.982, |
|
"grad_norm": 0.007147952448576689, |
|
"learning_rate": 9e-07, |
|
"loss": 0.0005, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.983, |
|
"grad_norm": 0.004181794356554747, |
|
"learning_rate": 8.500000000000001e-07, |
|
"loss": 0.0005, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 0.00277232495136559, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.0004, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.985, |
|
"grad_norm": 0.0024797001387923956, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.0006, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.986, |
|
"grad_norm": 0.002748242113739252, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 0.0005, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.987, |
|
"grad_norm": 0.002988820429891348, |
|
"learning_rate": 6.5e-07, |
|
"loss": 0.0004, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.988, |
|
"grad_norm": 0.002272873418405652, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.0006, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.989, |
|
"grad_norm": 0.0028824047185480595, |
|
"learning_rate": 5.5e-07, |
|
"loss": 0.0005, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.013895529322326183, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0005, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.991, |
|
"grad_norm": 0.004210934974253178, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.0004, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 0.0017349456902593374, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.0005, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.993, |
|
"grad_norm": 0.0036622195038944483, |
|
"learning_rate": 3.5000000000000004e-07, |
|
"loss": 0.0003, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.994, |
|
"grad_norm": 0.02928483486175537, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 0.0006, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.995, |
|
"grad_norm": 0.004271595273166895, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 0.0004, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.996, |
|
"grad_norm": 0.004935207776725292, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.0004, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.997, |
|
"grad_norm": 0.005258087068796158, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 0.0004, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.998, |
|
"grad_norm": 0.0014150363858789206, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 0.0004, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.999, |
|
"grad_norm": 0.003183445893228054, |
|
"learning_rate": 5.0000000000000004e-08, |
|
"loss": 0.0004, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0063432566821575165, |
|
"learning_rate": 0.0, |
|
"loss": 0.0004, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.6962203336704e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|