|
{ |
|
"best_metric": 1.2792317867279053, |
|
"best_model_checkpoint": "saved_model/c2s_sep_2024/checkpoint-4606", |
|
"epoch": 2.9998371777476254, |
|
"eval_steps": 500, |
|
"global_step": 13818, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0, |
|
"loss": 77.1448, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 17.278156280517578, |
|
"learning_rate": 2.5e-06, |
|
"loss": 76.5629, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 15.856775283813477, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 75.6974, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 15.606675148010254, |
|
"learning_rate": 1.2e-05, |
|
"loss": 74.9514, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 12.968363761901855, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 72.4643, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.329130172729492, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 69.0552, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 18.156723022460938, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 64.2775, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 29.901222229003906, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 52.1897, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 27.163593292236328, |
|
"learning_rate": 3.65e-05, |
|
"loss": 30.5964, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 13.53585433959961, |
|
"learning_rate": 4.15e-05, |
|
"loss": 12.5007, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.8353095054626465, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 6.4802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.838261127471924, |
|
"learning_rate": 5.1500000000000005e-05, |
|
"loss": 4.7819, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 8.852176666259766, |
|
"learning_rate": 5.65e-05, |
|
"loss": 4.1049, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 7.614436149597168, |
|
"learning_rate": 6.15e-05, |
|
"loss": 3.7732, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 7.756160259246826, |
|
"learning_rate": 6.65e-05, |
|
"loss": 3.6324, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.736324310302734, |
|
"learning_rate": 7.15e-05, |
|
"loss": 3.4327, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 8.393209457397461, |
|
"learning_rate": 7.65e-05, |
|
"loss": 3.4096, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.403553485870361, |
|
"learning_rate": 8.15e-05, |
|
"loss": 3.2845, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.367032051086426, |
|
"learning_rate": 8.65e-05, |
|
"loss": 3.2462, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.965042591094971, |
|
"learning_rate": 9.15e-05, |
|
"loss": 3.1463, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.074673175811768, |
|
"learning_rate": 9.65e-05, |
|
"loss": 3.1758, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.894763946533203, |
|
"learning_rate": 9.999345835150458e-05, |
|
"loss": 3.0311, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.925544738769531, |
|
"learning_rate": 9.997165285651984e-05, |
|
"loss": 3.0684, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.285668849945068, |
|
"learning_rate": 9.994984736153511e-05, |
|
"loss": 2.9234, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.81157922744751, |
|
"learning_rate": 9.992804186655037e-05, |
|
"loss": 2.8664, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.883147239685059, |
|
"learning_rate": 9.990623637156565e-05, |
|
"loss": 2.9204, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.505452632904053, |
|
"learning_rate": 9.988443087658091e-05, |
|
"loss": 2.8818, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.352786064147949, |
|
"learning_rate": 9.986262538159616e-05, |
|
"loss": 2.8999, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.875962734222412, |
|
"learning_rate": 9.984081988661144e-05, |
|
"loss": 2.8523, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.861810684204102, |
|
"learning_rate": 9.98190143916267e-05, |
|
"loss": 2.8062, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.396953582763672, |
|
"learning_rate": 9.979720889664196e-05, |
|
"loss": 2.7625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.168801307678223, |
|
"learning_rate": 9.977540340165722e-05, |
|
"loss": 2.7063, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.478597640991211, |
|
"learning_rate": 9.975359790667249e-05, |
|
"loss": 2.6539, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.2905731201171875, |
|
"learning_rate": 9.973179241168775e-05, |
|
"loss": 2.7406, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.451777935028076, |
|
"learning_rate": 9.970998691670301e-05, |
|
"loss": 2.6599, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.45026969909668, |
|
"learning_rate": 9.968818142171828e-05, |
|
"loss": 2.6406, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.353079795837402, |
|
"learning_rate": 9.966637592673354e-05, |
|
"loss": 2.5285, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.052408218383789, |
|
"learning_rate": 9.96445704317488e-05, |
|
"loss": 2.4025, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.303618431091309, |
|
"learning_rate": 9.962276493676407e-05, |
|
"loss": 2.2459, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.2505452632904053, |
|
"learning_rate": 9.960095944177933e-05, |
|
"loss": 2.166, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.806292772293091, |
|
"learning_rate": 9.95791539467946e-05, |
|
"loss": 2.0462, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.9824328422546387, |
|
"learning_rate": 9.955734845180987e-05, |
|
"loss": 1.9315, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.7355027198791504, |
|
"learning_rate": 9.953554295682512e-05, |
|
"loss": 1.9072, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.385045051574707, |
|
"learning_rate": 9.951373746184038e-05, |
|
"loss": 1.8667, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.4067020416259766, |
|
"learning_rate": 9.949193196685566e-05, |
|
"loss": 1.8179, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.6805872917175293, |
|
"learning_rate": 9.947012647187092e-05, |
|
"loss": 1.8208, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.9335626363754272, |
|
"learning_rate": 9.944832097688618e-05, |
|
"loss": 1.8092, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.7954732179641724, |
|
"learning_rate": 9.942651548190143e-05, |
|
"loss": 1.7698, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2542481422424316, |
|
"learning_rate": 9.940470998691671e-05, |
|
"loss": 1.7359, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.8089336156845093, |
|
"learning_rate": 9.938290449193197e-05, |
|
"loss": 1.7195, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.3044662475585938, |
|
"learning_rate": 9.936109899694724e-05, |
|
"loss": 1.6901, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.8811343908309937, |
|
"learning_rate": 9.93392935019625e-05, |
|
"loss": 1.6757, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.8750667572021484, |
|
"learning_rate": 9.931748800697776e-05, |
|
"loss": 1.6902, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.8759925365447998, |
|
"learning_rate": 9.929568251199303e-05, |
|
"loss": 1.6519, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.7360563278198242, |
|
"learning_rate": 9.927387701700829e-05, |
|
"loss": 1.6381, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.9994693994522095, |
|
"learning_rate": 9.925207152202356e-05, |
|
"loss": 1.6527, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.803330659866333, |
|
"learning_rate": 9.923026602703881e-05, |
|
"loss": 1.6453, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.569846272468567, |
|
"learning_rate": 9.920846053205408e-05, |
|
"loss": 1.6689, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.5712964534759521, |
|
"learning_rate": 9.918665503706934e-05, |
|
"loss": 1.6512, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.643431544303894, |
|
"learning_rate": 9.916484954208462e-05, |
|
"loss": 1.5994, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.619866132736206, |
|
"learning_rate": 9.914304404709988e-05, |
|
"loss": 1.6212, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.8739800453186035, |
|
"learning_rate": 9.912123855211514e-05, |
|
"loss": 1.5664, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9525455236434937, |
|
"learning_rate": 9.909943305713039e-05, |
|
"loss": 1.6108, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.5381406545639038, |
|
"learning_rate": 9.907762756214567e-05, |
|
"loss": 1.6004, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.5303971767425537, |
|
"learning_rate": 9.905582206716093e-05, |
|
"loss": 1.581, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.6467609405517578, |
|
"learning_rate": 9.90340165721762e-05, |
|
"loss": 1.5812, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.6094383001327515, |
|
"learning_rate": 9.901221107719146e-05, |
|
"loss": 1.6027, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.5612354278564453, |
|
"learning_rate": 9.899040558220672e-05, |
|
"loss": 1.5477, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.5925028324127197, |
|
"learning_rate": 9.896860008722198e-05, |
|
"loss": 1.5747, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.434138298034668, |
|
"learning_rate": 9.894679459223725e-05, |
|
"loss": 1.5528, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.6473920345306396, |
|
"learning_rate": 9.892498909725251e-05, |
|
"loss": 1.622, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.599965214729309, |
|
"learning_rate": 9.890318360226777e-05, |
|
"loss": 1.5691, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.6525471210479736, |
|
"learning_rate": 9.888137810728304e-05, |
|
"loss": 1.6131, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.5170183181762695, |
|
"learning_rate": 9.88595726122983e-05, |
|
"loss": 1.5221, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.596643328666687, |
|
"learning_rate": 9.883776711731358e-05, |
|
"loss": 1.545, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.5849794149398804, |
|
"learning_rate": 9.881596162232884e-05, |
|
"loss": 1.5654, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.4768157005310059, |
|
"learning_rate": 9.879415612734409e-05, |
|
"loss": 1.5345, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.5123172998428345, |
|
"learning_rate": 9.877235063235935e-05, |
|
"loss": 1.5236, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.5827418565750122, |
|
"learning_rate": 9.875054513737463e-05, |
|
"loss": 1.5174, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.8722275495529175, |
|
"learning_rate": 9.872873964238989e-05, |
|
"loss": 1.5256, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.6323179006576538, |
|
"learning_rate": 9.870693414740515e-05, |
|
"loss": 1.4835, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.618322491645813, |
|
"learning_rate": 9.868512865242042e-05, |
|
"loss": 1.5214, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6474233865737915, |
|
"learning_rate": 9.866332315743568e-05, |
|
"loss": 1.4811, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.4305635690689087, |
|
"learning_rate": 9.864151766245094e-05, |
|
"loss": 1.4727, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6656005382537842, |
|
"learning_rate": 9.86197121674662e-05, |
|
"loss": 1.5373, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.640834927558899, |
|
"learning_rate": 9.859790667248147e-05, |
|
"loss": 1.4811, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.474351167678833, |
|
"learning_rate": 9.857610117749673e-05, |
|
"loss": 1.4819, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.28626549243927, |
|
"learning_rate": 9.8554295682512e-05, |
|
"loss": 1.5221, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.313599944114685, |
|
"learning_rate": 9.853249018752726e-05, |
|
"loss": 1.5221, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.609924554824829, |
|
"learning_rate": 9.851068469254252e-05, |
|
"loss": 1.519, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.2516050338745117, |
|
"learning_rate": 9.84888791975578e-05, |
|
"loss": 1.4906, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.3122848272323608, |
|
"learning_rate": 9.846707370257305e-05, |
|
"loss": 1.5051, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.4828795194625854, |
|
"learning_rate": 9.844526820758831e-05, |
|
"loss": 1.5206, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.3761475086212158, |
|
"learning_rate": 9.842346271260357e-05, |
|
"loss": 1.503, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.4912587404251099, |
|
"learning_rate": 9.840165721761885e-05, |
|
"loss": 1.4932, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.2759939432144165, |
|
"learning_rate": 9.837985172263411e-05, |
|
"loss": 1.4843, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.6568008661270142, |
|
"learning_rate": 9.835804622764938e-05, |
|
"loss": 1.5046, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.4292601346969604, |
|
"learning_rate": 9.833624073266463e-05, |
|
"loss": 1.5249, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.4866324663162231, |
|
"learning_rate": 9.83144352376799e-05, |
|
"loss": 1.4959, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.429203748703003, |
|
"learning_rate": 9.829262974269517e-05, |
|
"loss": 1.4725, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.3150511980056763, |
|
"learning_rate": 9.827082424771043e-05, |
|
"loss": 1.4644, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.2386242151260376, |
|
"learning_rate": 9.824901875272569e-05, |
|
"loss": 1.4956, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.74444580078125, |
|
"learning_rate": 9.822721325774095e-05, |
|
"loss": 1.4477, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.21920907497406, |
|
"learning_rate": 9.820540776275622e-05, |
|
"loss": 1.5053, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.172884464263916, |
|
"learning_rate": 9.818360226777148e-05, |
|
"loss": 1.4478, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.3462252616882324, |
|
"learning_rate": 9.816179677278676e-05, |
|
"loss": 1.4749, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.230682373046875, |
|
"learning_rate": 9.8139991277802e-05, |
|
"loss": 1.4608, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.4852972030639648, |
|
"learning_rate": 9.811818578281727e-05, |
|
"loss": 1.5006, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2698734998703003, |
|
"learning_rate": 9.809638028783253e-05, |
|
"loss": 1.4521, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.3210391998291016, |
|
"learning_rate": 9.807457479284781e-05, |
|
"loss": 1.4506, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.329473853111267, |
|
"learning_rate": 9.805276929786307e-05, |
|
"loss": 1.4587, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.185905933380127, |
|
"learning_rate": 9.803096380287832e-05, |
|
"loss": 1.439, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.1401315927505493, |
|
"learning_rate": 9.800915830789358e-05, |
|
"loss": 1.4934, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.2437337636947632, |
|
"learning_rate": 9.798735281290886e-05, |
|
"loss": 1.4771, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.231963872909546, |
|
"learning_rate": 9.796554731792412e-05, |
|
"loss": 1.4428, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.274877905845642, |
|
"learning_rate": 9.794374182293939e-05, |
|
"loss": 1.4414, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.376755952835083, |
|
"learning_rate": 9.792193632795465e-05, |
|
"loss": 1.4366, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.0724767446517944, |
|
"learning_rate": 9.790013083296991e-05, |
|
"loss": 1.4817, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.3843764066696167, |
|
"learning_rate": 9.787832533798518e-05, |
|
"loss": 1.4986, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.327138900756836, |
|
"learning_rate": 9.785651984300044e-05, |
|
"loss": 1.4484, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.3678048849105835, |
|
"learning_rate": 9.78347143480157e-05, |
|
"loss": 1.454, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.4238979816436768, |
|
"learning_rate": 9.781290885303097e-05, |
|
"loss": 1.4491, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1681418418884277, |
|
"learning_rate": 9.779110335804623e-05, |
|
"loss": 1.4524, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.2097047567367554, |
|
"learning_rate": 9.776929786306149e-05, |
|
"loss": 1.4562, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.3048409223556519, |
|
"learning_rate": 9.774749236807677e-05, |
|
"loss": 1.4508, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.3852041959762573, |
|
"learning_rate": 9.772568687309203e-05, |
|
"loss": 1.4277, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.179715871810913, |
|
"learning_rate": 9.770388137810728e-05, |
|
"loss": 1.415, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.1659610271453857, |
|
"learning_rate": 9.768207588312254e-05, |
|
"loss": 1.4528, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.334057331085205, |
|
"learning_rate": 9.766027038813782e-05, |
|
"loss": 1.4525, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.5751981735229492, |
|
"learning_rate": 9.763846489315308e-05, |
|
"loss": 1.4427, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.1843003034591675, |
|
"learning_rate": 9.761665939816835e-05, |
|
"loss": 1.4427, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.3135390281677246, |
|
"learning_rate": 9.759485390318361e-05, |
|
"loss": 1.4245, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.1618658304214478, |
|
"learning_rate": 9.757304840819887e-05, |
|
"loss": 1.4622, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.159295678138733, |
|
"learning_rate": 9.755124291321414e-05, |
|
"loss": 1.4557, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.209723949432373, |
|
"learning_rate": 9.75294374182294e-05, |
|
"loss": 1.41, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2520672082901, |
|
"learning_rate": 9.750763192324466e-05, |
|
"loss": 1.4362, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.2639249563217163, |
|
"learning_rate": 9.748582642825992e-05, |
|
"loss": 1.4526, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.2657458782196045, |
|
"learning_rate": 9.746402093327519e-05, |
|
"loss": 1.4479, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.4267339706420898, |
|
"learning_rate": 9.744221543829045e-05, |
|
"loss": 1.4219, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.1722772121429443, |
|
"learning_rate": 9.742040994330571e-05, |
|
"loss": 1.448, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.1443181037902832, |
|
"learning_rate": 9.739860444832099e-05, |
|
"loss": 1.4193, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2879366874694824, |
|
"learning_rate": 9.737679895333624e-05, |
|
"loss": 1.4196, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2243574857711792, |
|
"learning_rate": 9.73549934583515e-05, |
|
"loss": 1.4296, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2071127891540527, |
|
"learning_rate": 9.733318796336677e-05, |
|
"loss": 1.4194, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.1925525665283203, |
|
"learning_rate": 9.731138246838204e-05, |
|
"loss": 1.4243, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2962863445281982, |
|
"learning_rate": 9.72895769733973e-05, |
|
"loss": 1.4371, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.0177215337753296, |
|
"learning_rate": 9.726777147841255e-05, |
|
"loss": 1.4237, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.4175331592559814, |
|
"learning_rate": 9.724596598342783e-05, |
|
"loss": 1.4107, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.0958452224731445, |
|
"learning_rate": 9.72241604884431e-05, |
|
"loss": 1.4176, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.1612709760665894, |
|
"learning_rate": 9.720235499345836e-05, |
|
"loss": 1.4051, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.0781750679016113, |
|
"learning_rate": 9.718054949847362e-05, |
|
"loss": 1.4179, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.1481519937515259, |
|
"learning_rate": 9.715874400348888e-05, |
|
"loss": 1.4247, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.155716896057129, |
|
"learning_rate": 9.713693850850415e-05, |
|
"loss": 1.4268, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.0442588329315186, |
|
"learning_rate": 9.711513301351941e-05, |
|
"loss": 1.445, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.0979626178741455, |
|
"learning_rate": 9.709332751853467e-05, |
|
"loss": 1.4149, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.119378685951233, |
|
"learning_rate": 9.707152202354995e-05, |
|
"loss": 1.44, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.2214171886444092, |
|
"learning_rate": 9.70497165285652e-05, |
|
"loss": 1.44, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.1184163093566895, |
|
"learning_rate": 9.702791103358046e-05, |
|
"loss": 1.3981, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.130410075187683, |
|
"learning_rate": 9.700610553859572e-05, |
|
"loss": 1.4296, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.1225483417510986, |
|
"learning_rate": 9.6984300043611e-05, |
|
"loss": 1.4153, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.0556180477142334, |
|
"learning_rate": 9.696249454862626e-05, |
|
"loss": 1.4219, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.2000679969787598, |
|
"learning_rate": 9.694068905364151e-05, |
|
"loss": 1.3892, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.0137077569961548, |
|
"learning_rate": 9.691888355865678e-05, |
|
"loss": 1.3976, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.0124636888504028, |
|
"learning_rate": 9.689707806367205e-05, |
|
"loss": 1.4129, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.0647350549697876, |
|
"learning_rate": 9.687527256868732e-05, |
|
"loss": 1.357, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.0684030055999756, |
|
"learning_rate": 9.685346707370258e-05, |
|
"loss": 1.4082, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.0580588579177856, |
|
"learning_rate": 9.683166157871784e-05, |
|
"loss": 1.3959, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1602911949157715, |
|
"learning_rate": 9.68098560837331e-05, |
|
"loss": 1.3857, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1642051935195923, |
|
"learning_rate": 9.678805058874837e-05, |
|
"loss": 1.4055, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.0410170555114746, |
|
"learning_rate": 9.676624509376363e-05, |
|
"loss": 1.4071, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.067542314529419, |
|
"learning_rate": 9.674443959877891e-05, |
|
"loss": 1.4093, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.2621368169784546, |
|
"learning_rate": 9.672263410379416e-05, |
|
"loss": 1.3814, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.0956709384918213, |
|
"learning_rate": 9.670082860880942e-05, |
|
"loss": 1.4024, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.1027687788009644, |
|
"learning_rate": 9.667902311382468e-05, |
|
"loss": 1.3544, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.1282079219818115, |
|
"learning_rate": 9.665721761883996e-05, |
|
"loss": 1.3818, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.244485855102539, |
|
"learning_rate": 9.663541212385522e-05, |
|
"loss": 1.4024, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2329769134521484, |
|
"learning_rate": 9.661360662887047e-05, |
|
"loss": 1.413, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2671635150909424, |
|
"learning_rate": 9.659180113388574e-05, |
|
"loss": 1.4002, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.2992949485778809, |
|
"learning_rate": 9.656999563890101e-05, |
|
"loss": 1.3972, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.15711510181427, |
|
"learning_rate": 9.654819014391628e-05, |
|
"loss": 1.3882, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.122938632965088, |
|
"learning_rate": 9.652638464893154e-05, |
|
"loss": 1.4222, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.151628851890564, |
|
"learning_rate": 9.650457915394679e-05, |
|
"loss": 1.3898, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.0860607624053955, |
|
"learning_rate": 9.648277365896206e-05, |
|
"loss": 1.3745, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.9899650812149048, |
|
"learning_rate": 9.646096816397733e-05, |
|
"loss": 1.3985, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.019313097000122, |
|
"learning_rate": 9.643916266899259e-05, |
|
"loss": 1.4031, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.1719962358474731, |
|
"learning_rate": 9.641735717400785e-05, |
|
"loss": 1.3781, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.117961049079895, |
|
"learning_rate": 9.639555167902312e-05, |
|
"loss": 1.3885, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.3950169086456299, |
|
"learning_rate": 9.637374618403838e-05, |
|
"loss": 1.3746, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.1064496040344238, |
|
"learning_rate": 9.635194068905364e-05, |
|
"loss": 1.3764, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.174922227859497, |
|
"learning_rate": 9.63301351940689e-05, |
|
"loss": 1.42, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.3221770524978638, |
|
"learning_rate": 9.630832969908418e-05, |
|
"loss": 1.3712, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.0039620399475098, |
|
"learning_rate": 9.628652420409943e-05, |
|
"loss": 1.3976, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.9963878393173218, |
|
"learning_rate": 9.62647187091147e-05, |
|
"loss": 1.3977, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.2195067405700684, |
|
"learning_rate": 9.624291321412997e-05, |
|
"loss": 1.3847, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.0968499183654785, |
|
"learning_rate": 9.622110771914523e-05, |
|
"loss": 1.3937, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.992825448513031, |
|
"learning_rate": 9.61993022241605e-05, |
|
"loss": 1.4082, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0395129919052124, |
|
"learning_rate": 9.617749672917575e-05, |
|
"loss": 1.3696, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.030629277229309, |
|
"learning_rate": 9.615569123419102e-05, |
|
"loss": 1.4, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0580593347549438, |
|
"learning_rate": 9.613388573920629e-05, |
|
"loss": 1.3461, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.2588000297546387, |
|
"learning_rate": 9.611208024422155e-05, |
|
"loss": 1.3687, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.1057671308517456, |
|
"learning_rate": 9.609027474923681e-05, |
|
"loss": 1.3876, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.1952061653137207, |
|
"learning_rate": 9.606846925425208e-05, |
|
"loss": 1.3821, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.105406641960144, |
|
"learning_rate": 9.604666375926734e-05, |
|
"loss": 1.375, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.0594791173934937, |
|
"learning_rate": 9.60248582642826e-05, |
|
"loss": 1.3644, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.055421233177185, |
|
"learning_rate": 9.600305276929787e-05, |
|
"loss": 1.3938, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.2545115947723389, |
|
"learning_rate": 9.598124727431314e-05, |
|
"loss": 1.3709, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.9864488244056702, |
|
"learning_rate": 9.595944177932839e-05, |
|
"loss": 1.3802, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.0537374019622803, |
|
"learning_rate": 9.593763628434365e-05, |
|
"loss": 1.3847, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.0474879741668701, |
|
"learning_rate": 9.591583078935892e-05, |
|
"loss": 1.3616, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.1384907960891724, |
|
"learning_rate": 9.58940252943742e-05, |
|
"loss": 1.3548, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1582238674163818, |
|
"learning_rate": 9.587221979938946e-05, |
|
"loss": 1.374, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1610651016235352, |
|
"learning_rate": 9.58504143044047e-05, |
|
"loss": 1.3726, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.0401073694229126, |
|
"learning_rate": 9.582860880941997e-05, |
|
"loss": 1.3617, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1059417724609375, |
|
"learning_rate": 9.580680331443525e-05, |
|
"loss": 1.3765, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.055931806564331, |
|
"learning_rate": 9.578499781945051e-05, |
|
"loss": 1.377, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.1078617572784424, |
|
"learning_rate": 9.576319232446577e-05, |
|
"loss": 1.3714, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.0788148641586304, |
|
"learning_rate": 9.574138682948104e-05, |
|
"loss": 1.3769, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.1252089738845825, |
|
"learning_rate": 9.57195813344963e-05, |
|
"loss": 1.3583, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.0174541473388672, |
|
"learning_rate": 9.569777583951156e-05, |
|
"loss": 1.3665, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.0689630508422852, |
|
"learning_rate": 9.567597034452682e-05, |
|
"loss": 1.3571, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1311278343200684, |
|
"learning_rate": 9.565416484954209e-05, |
|
"loss": 1.3475, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.082227349281311, |
|
"learning_rate": 9.563235935455735e-05, |
|
"loss": 1.3952, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.116151213645935, |
|
"learning_rate": 9.561055385957261e-05, |
|
"loss": 1.3644, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.2500598430633545, |
|
"learning_rate": 9.558874836458788e-05, |
|
"loss": 1.3197, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.1783186197280884, |
|
"learning_rate": 9.556694286960315e-05, |
|
"loss": 1.3599, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.964650571346283, |
|
"learning_rate": 9.554513737461842e-05, |
|
"loss": 1.3765, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.1065633296966553, |
|
"learning_rate": 9.552333187963367e-05, |
|
"loss": 1.3605, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.4492055177688599, |
|
"learning_rate": 9.550152638464893e-05, |
|
"loss": 1.3766, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.9989602565765381, |
|
"learning_rate": 9.54797208896642e-05, |
|
"loss": 1.3821, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2991678714752197, |
|
"learning_rate": 9.545791539467947e-05, |
|
"loss": 1.3418, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1501140594482422, |
|
"learning_rate": 9.543610989969473e-05, |
|
"loss": 1.3627, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.9911489486694336, |
|
"learning_rate": 9.541430440470998e-05, |
|
"loss": 1.3413, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1046435832977295, |
|
"learning_rate": 9.539249890972526e-05, |
|
"loss": 1.3494, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.0511558055877686, |
|
"learning_rate": 9.537069341474052e-05, |
|
"loss": 1.3347, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.1485401391983032, |
|
"learning_rate": 9.534888791975578e-05, |
|
"loss": 1.3833, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.2908611297607422, |
|
"learning_rate": 9.532708242477105e-05, |
|
"loss": 1.3958, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.0557186603546143, |
|
"learning_rate": 9.530527692978631e-05, |
|
"loss": 1.3455, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.0551774501800537, |
|
"learning_rate": 9.528347143480157e-05, |
|
"loss": 1.3366, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0171273946762085, |
|
"learning_rate": 9.526166593981684e-05, |
|
"loss": 1.3488, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.3464566469192505, |
|
"learning_rate": 9.523986044483211e-05, |
|
"loss": 1.3274, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.1853042840957642, |
|
"learning_rate": 9.521805494984737e-05, |
|
"loss": 1.3553, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.2067043781280518, |
|
"learning_rate": 9.519624945486262e-05, |
|
"loss": 1.358, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.0003714561462402, |
|
"learning_rate": 9.517444395987789e-05, |
|
"loss": 1.3768, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.036536455154419, |
|
"learning_rate": 9.515263846489316e-05, |
|
"loss": 1.325, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.2333424091339111, |
|
"learning_rate": 9.513083296990843e-05, |
|
"loss": 1.3179, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.5285654067993164, |
|
"learning_rate": 9.510902747492369e-05, |
|
"loss": 1.3847, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.9648860096931458, |
|
"learning_rate": 9.508722197993894e-05, |
|
"loss": 1.3624, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0200995206832886, |
|
"learning_rate": 9.506541648495422e-05, |
|
"loss": 1.3604, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0368491411209106, |
|
"learning_rate": 9.504361098996948e-05, |
|
"loss": 1.3778, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.9241245985031128, |
|
"learning_rate": 9.502180549498474e-05, |
|
"loss": 1.3751, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0286930799484253, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.3429, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.262276530265808, |
|
"learning_rate": 9.497819450501527e-05, |
|
"loss": 1.3533, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1345752477645874, |
|
"learning_rate": 9.495638901003053e-05, |
|
"loss": 1.3502, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.025653600692749, |
|
"learning_rate": 9.49345835150458e-05, |
|
"loss": 1.3674, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.0177459716796875, |
|
"learning_rate": 9.491277802006106e-05, |
|
"loss": 1.356, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1438894271850586, |
|
"learning_rate": 9.489097252507632e-05, |
|
"loss": 1.3488, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.133844017982483, |
|
"learning_rate": 9.486916703009158e-05, |
|
"loss": 1.3649, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.0228559970855713, |
|
"learning_rate": 9.484736153510685e-05, |
|
"loss": 1.3207, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.037307858467102, |
|
"learning_rate": 9.482555604012211e-05, |
|
"loss": 1.3517, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.123706340789795, |
|
"learning_rate": 9.480375054513739e-05, |
|
"loss": 1.371, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.0684685707092285, |
|
"learning_rate": 9.478194505015265e-05, |
|
"loss": 1.335, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.9726172089576721, |
|
"learning_rate": 9.47601395551679e-05, |
|
"loss": 1.3588, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.8923851251602173, |
|
"learning_rate": 9.473833406018318e-05, |
|
"loss": 1.3269, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.1655867099761963, |
|
"learning_rate": 9.471652856519844e-05, |
|
"loss": 1.3267, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.9636451005935669, |
|
"learning_rate": 9.46947230702137e-05, |
|
"loss": 1.3545, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1559605598449707, |
|
"learning_rate": 9.467291757522896e-05, |
|
"loss": 1.3276, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1488990783691406, |
|
"learning_rate": 9.465111208024423e-05, |
|
"loss": 1.3312, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.0026187896728516, |
|
"learning_rate": 9.462930658525949e-05, |
|
"loss": 1.3574, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.0129337310791016, |
|
"learning_rate": 9.460750109027475e-05, |
|
"loss": 1.3524, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1561243534088135, |
|
"learning_rate": 9.458569559529002e-05, |
|
"loss": 1.3467, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0476332902908325, |
|
"learning_rate": 9.456389010030528e-05, |
|
"loss": 1.3552, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0199921131134033, |
|
"learning_rate": 9.454208460532054e-05, |
|
"loss": 1.3313, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.2194985151290894, |
|
"learning_rate": 9.45202791103358e-05, |
|
"loss": 1.3134, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.9112060070037842, |
|
"learning_rate": 9.449847361535107e-05, |
|
"loss": 1.3581, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.085046648979187, |
|
"learning_rate": 9.447666812036635e-05, |
|
"loss": 1.3344, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.0680015087127686, |
|
"learning_rate": 9.445486262538161e-05, |
|
"loss": 1.3227, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.9969652891159058, |
|
"learning_rate": 9.443305713039686e-05, |
|
"loss": 1.3324, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.0868465900421143, |
|
"learning_rate": 9.441125163541212e-05, |
|
"loss": 1.3261, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.0380125045776367, |
|
"learning_rate": 9.43894461404274e-05, |
|
"loss": 1.3378, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.9851745367050171, |
|
"learning_rate": 9.436764064544266e-05, |
|
"loss": 1.3171, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.9909139275550842, |
|
"learning_rate": 9.434583515045792e-05, |
|
"loss": 1.3073, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.0225688219070435, |
|
"learning_rate": 9.432402965547317e-05, |
|
"loss": 1.3119, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.018894910812378, |
|
"learning_rate": 9.430222416048845e-05, |
|
"loss": 1.3337, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.0594004392623901, |
|
"learning_rate": 9.428041866550371e-05, |
|
"loss": 1.309, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.0812976360321045, |
|
"learning_rate": 9.425861317051898e-05, |
|
"loss": 1.3403, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.9586821794509888, |
|
"learning_rate": 9.423680767553424e-05, |
|
"loss": 1.3413, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.9033297896385193, |
|
"learning_rate": 9.42150021805495e-05, |
|
"loss": 1.3361, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.976488471031189, |
|
"learning_rate": 9.419319668556476e-05, |
|
"loss": 1.3467, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.9687233567237854, |
|
"learning_rate": 9.417139119058003e-05, |
|
"loss": 1.3089, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.9967139959335327, |
|
"learning_rate": 9.41495856955953e-05, |
|
"loss": 1.3241, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.9404115676879883, |
|
"learning_rate": 9.412778020061055e-05, |
|
"loss": 1.3489, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.038221001625061, |
|
"learning_rate": 9.410597470562582e-05, |
|
"loss": 1.3405, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.9442505240440369, |
|
"learning_rate": 9.408416921064108e-05, |
|
"loss": 1.3733, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.8614059090614319, |
|
"learning_rate": 9.406236371565636e-05, |
|
"loss": 1.3369, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0159504413604736, |
|
"learning_rate": 9.404055822067162e-05, |
|
"loss": 1.3473, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.9344844222068787, |
|
"learning_rate": 9.401875272568688e-05, |
|
"loss": 1.3191, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.9241899251937866, |
|
"learning_rate": 9.399694723070213e-05, |
|
"loss": 1.3074, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0132297277450562, |
|
"learning_rate": 9.397514173571741e-05, |
|
"loss": 1.3345, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.035719633102417, |
|
"learning_rate": 9.395333624073267e-05, |
|
"loss": 1.3241, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0716739892959595, |
|
"learning_rate": 9.393153074574793e-05, |
|
"loss": 1.3342, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.05617094039917, |
|
"learning_rate": 9.39097252507632e-05, |
|
"loss": 1.3174, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0201910734176636, |
|
"learning_rate": 9.388791975577846e-05, |
|
"loss": 1.3427, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.9820442199707031, |
|
"learning_rate": 9.386611426079372e-05, |
|
"loss": 1.3187, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.9873951077461243, |
|
"learning_rate": 9.384430876580899e-05, |
|
"loss": 1.311, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0694694519042969, |
|
"learning_rate": 9.382250327082425e-05, |
|
"loss": 1.3409, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.9933134317398071, |
|
"learning_rate": 9.380069777583951e-05, |
|
"loss": 1.3202, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0120593309402466, |
|
"learning_rate": 9.377889228085478e-05, |
|
"loss": 1.3243, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.0012543201446533, |
|
"learning_rate": 9.375708678587004e-05, |
|
"loss": 1.3205, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.9940156936645508, |
|
"learning_rate": 9.373528129088532e-05, |
|
"loss": 1.3319, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.9410566687583923, |
|
"learning_rate": 9.371347579590058e-05, |
|
"loss": 1.3377, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.0209511518478394, |
|
"learning_rate": 9.369167030091584e-05, |
|
"loss": 1.3226, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.0901682376861572, |
|
"learning_rate": 9.366986480593109e-05, |
|
"loss": 1.3054, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1590335369110107, |
|
"learning_rate": 9.364805931094637e-05, |
|
"loss": 1.333, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.9248669147491455, |
|
"learning_rate": 9.362625381596163e-05, |
|
"loss": 1.3195, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.9178153276443481, |
|
"learning_rate": 9.36044483209769e-05, |
|
"loss": 1.3411, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.8997146487236023, |
|
"learning_rate": 9.358264282599216e-05, |
|
"loss": 1.3238, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.872699499130249, |
|
"learning_rate": 9.356083733100742e-05, |
|
"loss": 1.311, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.0057190656661987, |
|
"learning_rate": 9.353903183602268e-05, |
|
"loss": 1.3419, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.9421138763427734, |
|
"learning_rate": 9.351722634103795e-05, |
|
"loss": 1.3326, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.072662353515625, |
|
"learning_rate": 9.349542084605321e-05, |
|
"loss": 1.3101, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.9273852109909058, |
|
"learning_rate": 9.347361535106847e-05, |
|
"loss": 1.2917, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.056483507156372, |
|
"learning_rate": 9.345180985608373e-05, |
|
"loss": 1.3145, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.0562832355499268, |
|
"learning_rate": 9.3430004361099e-05, |
|
"loss": 1.3236, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.9665394425392151, |
|
"learning_rate": 9.340819886611426e-05, |
|
"loss": 1.3311, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.1284903287887573, |
|
"learning_rate": 9.338639337112954e-05, |
|
"loss": 1.2955, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.8982547521591187, |
|
"learning_rate": 9.336458787614479e-05, |
|
"loss": 1.3064, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.9506440162658691, |
|
"learning_rate": 9.334278238116005e-05, |
|
"loss": 1.2924, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.990853488445282, |
|
"learning_rate": 9.332097688617531e-05, |
|
"loss": 1.3153, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.048412561416626, |
|
"learning_rate": 9.329917139119059e-05, |
|
"loss": 1.3151, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.9810274243354797, |
|
"learning_rate": 9.327736589620585e-05, |
|
"loss": 1.3106, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.2232158184051514, |
|
"learning_rate": 9.325556040122112e-05, |
|
"loss": 1.3269, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.9797046780586243, |
|
"learning_rate": 9.323375490623638e-05, |
|
"loss": 1.3237, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.9088875651359558, |
|
"learning_rate": 9.321194941125164e-05, |
|
"loss": 1.328, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.9865596294403076, |
|
"learning_rate": 9.31901439162669e-05, |
|
"loss": 1.3245, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.890883207321167, |
|
"learning_rate": 9.316833842128217e-05, |
|
"loss": 1.3078, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.2496368885040283, |
|
"learning_rate": 9.314653292629743e-05, |
|
"loss": 1.2926, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.9493234753608704, |
|
"learning_rate": 9.31247274313127e-05, |
|
"loss": 1.3267, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.9854113459587097, |
|
"learning_rate": 9.310292193632796e-05, |
|
"loss": 1.315, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.9487243294715881, |
|
"learning_rate": 9.308111644134322e-05, |
|
"loss": 1.3089, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0045417547225952, |
|
"learning_rate": 9.30593109463585e-05, |
|
"loss": 1.3007, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.9876412749290466, |
|
"learning_rate": 9.303750545137375e-05, |
|
"loss": 1.3276, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.9821478724479675, |
|
"learning_rate": 9.301569995638901e-05, |
|
"loss": 1.3276, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0079724788665771, |
|
"learning_rate": 9.299389446140427e-05, |
|
"loss": 1.3379, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0058810710906982, |
|
"learning_rate": 9.297208896641955e-05, |
|
"loss": 1.309, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.9457936882972717, |
|
"learning_rate": 9.295028347143481e-05, |
|
"loss": 1.3301, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.0582879781723022, |
|
"learning_rate": 9.292847797645007e-05, |
|
"loss": 1.3075, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.0312747955322266, |
|
"learning_rate": 9.290667248146532e-05, |
|
"loss": 1.3102, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.3287076950073242, |
|
"learning_rate": 9.28848669864806e-05, |
|
"loss": 1.2828, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.0003306865692139, |
|
"learning_rate": 9.286306149149586e-05, |
|
"loss": 1.3158, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.9804103970527649, |
|
"learning_rate": 9.284125599651113e-05, |
|
"loss": 1.3429, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.9052048325538635, |
|
"learning_rate": 9.281945050152639e-05, |
|
"loss": 1.3248, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.9492114782333374, |
|
"learning_rate": 9.279764500654165e-05, |
|
"loss": 1.3173, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.9319648742675781, |
|
"learning_rate": 9.277583951155692e-05, |
|
"loss": 1.3188, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.9741306900978088, |
|
"learning_rate": 9.275403401657218e-05, |
|
"loss": 1.3263, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.9644444584846497, |
|
"learning_rate": 9.273222852158746e-05, |
|
"loss": 1.3089, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.972549319267273, |
|
"learning_rate": 9.27104230266027e-05, |
|
"loss": 1.3047, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.1472231149673462, |
|
"learning_rate": 9.268861753161797e-05, |
|
"loss": 1.3414, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.212759256362915, |
|
"learning_rate": 9.266681203663323e-05, |
|
"loss": 1.2955, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.9833585023880005, |
|
"learning_rate": 9.264500654164851e-05, |
|
"loss": 1.3101, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.0089327096939087, |
|
"learning_rate": 9.262320104666377e-05, |
|
"loss": 1.3078, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.026849627494812, |
|
"learning_rate": 9.260139555167902e-05, |
|
"loss": 1.3062, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.8988268375396729, |
|
"learning_rate": 9.257959005669428e-05, |
|
"loss": 1.2961, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0766083002090454, |
|
"learning_rate": 9.255778456170956e-05, |
|
"loss": 1.302, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.111632227897644, |
|
"learning_rate": 9.253597906672482e-05, |
|
"loss": 1.3179, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.9569946527481079, |
|
"learning_rate": 9.251417357174009e-05, |
|
"loss": 1.3392, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.9719332456588745, |
|
"learning_rate": 9.249236807675535e-05, |
|
"loss": 1.3019, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.9521161317825317, |
|
"learning_rate": 9.247056258177061e-05, |
|
"loss": 1.3226, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.1349732875823975, |
|
"learning_rate": 9.244875708678587e-05, |
|
"loss": 1.3184, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.0802345275878906, |
|
"learning_rate": 9.242695159180114e-05, |
|
"loss": 1.3236, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.0327568054199219, |
|
"learning_rate": 9.24051460968164e-05, |
|
"loss": 1.3285, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.064948320388794, |
|
"learning_rate": 9.238334060183166e-05, |
|
"loss": 1.3158, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.88676518201828, |
|
"learning_rate": 9.236153510684693e-05, |
|
"loss": 1.3066, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.942152202129364, |
|
"learning_rate": 9.233972961186219e-05, |
|
"loss": 1.332, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.9341984987258911, |
|
"learning_rate": 9.231792411687745e-05, |
|
"loss": 1.3147, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.8915871381759644, |
|
"learning_rate": 9.229611862189273e-05, |
|
"loss": 1.3071, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.9265626668930054, |
|
"learning_rate": 9.227431312690798e-05, |
|
"loss": 1.3083, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.9003929495811462, |
|
"learning_rate": 9.225250763192324e-05, |
|
"loss": 1.3101, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.004757285118103, |
|
"learning_rate": 9.223070213693852e-05, |
|
"loss": 1.3324, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.9720560908317566, |
|
"learning_rate": 9.220889664195378e-05, |
|
"loss": 1.3074, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.0125725269317627, |
|
"learning_rate": 9.218709114696904e-05, |
|
"loss": 1.295, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.9948697686195374, |
|
"learning_rate": 9.21652856519843e-05, |
|
"loss": 1.3072, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.8904112577438354, |
|
"learning_rate": 9.214348015699957e-05, |
|
"loss": 1.2879, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.9827283620834351, |
|
"learning_rate": 9.212167466201483e-05, |
|
"loss": 1.2859, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.9134978652000427, |
|
"learning_rate": 9.20998691670301e-05, |
|
"loss": 1.2996, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.9517325162887573, |
|
"learning_rate": 9.207806367204536e-05, |
|
"loss": 1.2764, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.9537093043327332, |
|
"learning_rate": 9.205625817706062e-05, |
|
"loss": 1.3112, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.011399269104004, |
|
"learning_rate": 9.203445268207589e-05, |
|
"loss": 1.3008, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0325734615325928, |
|
"learning_rate": 9.201264718709115e-05, |
|
"loss": 1.3032, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.9590222239494324, |
|
"learning_rate": 9.199084169210641e-05, |
|
"loss": 1.3002, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.984958827495575, |
|
"learning_rate": 9.196903619712169e-05, |
|
"loss": 1.3011, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.1154364347457886, |
|
"learning_rate": 9.194723070213694e-05, |
|
"loss": 1.3065, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0203578472137451, |
|
"learning_rate": 9.19254252071522e-05, |
|
"loss": 1.3193, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0204946994781494, |
|
"learning_rate": 9.190361971216746e-05, |
|
"loss": 1.3048, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.9758703708648682, |
|
"learning_rate": 9.188181421718274e-05, |
|
"loss": 1.2933, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0854405164718628, |
|
"learning_rate": 9.1860008722198e-05, |
|
"loss": 1.2947, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0030591487884521, |
|
"learning_rate": 9.183820322721325e-05, |
|
"loss": 1.2882, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.9652947187423706, |
|
"learning_rate": 9.181639773222852e-05, |
|
"loss": 1.2779, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0450283288955688, |
|
"learning_rate": 9.179459223724379e-05, |
|
"loss": 1.2807, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0894801616668701, |
|
"learning_rate": 9.177278674225906e-05, |
|
"loss": 1.3072, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0392231941223145, |
|
"learning_rate": 9.175098124727432e-05, |
|
"loss": 1.3119, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.9792558550834656, |
|
"learning_rate": 9.172917575228958e-05, |
|
"loss": 1.3062, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.015689492225647, |
|
"learning_rate": 9.170737025730485e-05, |
|
"loss": 1.3075, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0359702110290527, |
|
"learning_rate": 9.168556476232011e-05, |
|
"loss": 1.3022, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.9113004803657532, |
|
"learning_rate": 9.166375926733537e-05, |
|
"loss": 1.3298, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.0571136474609375, |
|
"learning_rate": 9.164195377235065e-05, |
|
"loss": 1.2898, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.9297426342964172, |
|
"learning_rate": 9.16201482773659e-05, |
|
"loss": 1.2895, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.0925400257110596, |
|
"learning_rate": 9.159834278238116e-05, |
|
"loss": 1.2998, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.9070808291435242, |
|
"learning_rate": 9.157653728739642e-05, |
|
"loss": 1.2998, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.1315734386444092, |
|
"learning_rate": 9.15547317924117e-05, |
|
"loss": 1.2867, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0597316026687622, |
|
"learning_rate": 9.153292629742696e-05, |
|
"loss": 1.2931, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.9442005157470703, |
|
"learning_rate": 9.151112080244221e-05, |
|
"loss": 1.2805, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.3041001558303833, |
|
"learning_rate": 9.148931530745748e-05, |
|
"loss": 1.2934, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.9306684136390686, |
|
"learning_rate": 9.146750981247275e-05, |
|
"loss": 1.2933, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.9480651021003723, |
|
"learning_rate": 9.144570431748802e-05, |
|
"loss": 1.3147, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.98679119348526, |
|
"learning_rate": 9.142389882250328e-05, |
|
"loss": 1.3063, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.9486891627311707, |
|
"learning_rate": 9.140209332751853e-05, |
|
"loss": 1.2644, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.9325621724128723, |
|
"learning_rate": 9.13802878325338e-05, |
|
"loss": 1.2718, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.9871125221252441, |
|
"learning_rate": 9.135848233754907e-05, |
|
"loss": 1.2943, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.9043755531311035, |
|
"learning_rate": 9.133667684256433e-05, |
|
"loss": 1.3015, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.9878096580505371, |
|
"learning_rate": 9.13148713475796e-05, |
|
"loss": 1.2524, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.925841748714447, |
|
"learning_rate": 9.129306585259486e-05, |
|
"loss": 1.2881, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.8888818025588989, |
|
"learning_rate": 9.127126035761012e-05, |
|
"loss": 1.3057, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1273852586746216, |
|
"learning_rate": 9.124945486262538e-05, |
|
"loss": 1.3068, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.078979253768921, |
|
"learning_rate": 9.122764936764066e-05, |
|
"loss": 1.311, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.139224648475647, |
|
"learning_rate": 9.120584387265592e-05, |
|
"loss": 1.2961, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.9568941593170166, |
|
"learning_rate": 9.118403837767117e-05, |
|
"loss": 1.3335, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.8990288972854614, |
|
"learning_rate": 9.116223288268643e-05, |
|
"loss": 1.2983, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0404481887817383, |
|
"learning_rate": 9.114042738770171e-05, |
|
"loss": 1.2867, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.970191240310669, |
|
"learning_rate": 9.111862189271697e-05, |
|
"loss": 1.2923, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.9285945296287537, |
|
"learning_rate": 9.109681639773224e-05, |
|
"loss": 1.296, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0113970041275024, |
|
"learning_rate": 9.107501090274749e-05, |
|
"loss": 1.2861, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.0101959705352783, |
|
"learning_rate": 9.105320540776276e-05, |
|
"loss": 1.2958, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.9014917612075806, |
|
"learning_rate": 9.103139991277803e-05, |
|
"loss": 1.2735, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.4451045989990234, |
|
"learning_rate": 9.100959441779329e-05, |
|
"loss": 1.3111, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.9970597624778748, |
|
"learning_rate": 9.098778892280855e-05, |
|
"loss": 1.2725, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.9795159101486206, |
|
"learning_rate": 9.096598342782382e-05, |
|
"loss": 1.286, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.1754708290100098, |
|
"learning_rate": 9.094417793283908e-05, |
|
"loss": 1.2903, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.02108895778656, |
|
"learning_rate": 9.092237243785434e-05, |
|
"loss": 1.2865, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.9269696474075317, |
|
"learning_rate": 9.09005669428696e-05, |
|
"loss": 1.3163, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.9824286103248596, |
|
"learning_rate": 9.087876144788488e-05, |
|
"loss": 1.2713, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.2137070894241333, |
|
"learning_rate": 9.085695595290013e-05, |
|
"loss": 1.313, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0218490362167358, |
|
"learning_rate": 9.08351504579154e-05, |
|
"loss": 1.2864, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0295207500457764, |
|
"learning_rate": 9.081334496293066e-05, |
|
"loss": 1.2974, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0075607299804688, |
|
"learning_rate": 9.079153946794593e-05, |
|
"loss": 1.3011, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.889430820941925, |
|
"learning_rate": 9.07697339729612e-05, |
|
"loss": 1.3112, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.9565015435218811, |
|
"learning_rate": 9.074792847797645e-05, |
|
"loss": 1.3019, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.0241695642471313, |
|
"learning_rate": 9.072612298299172e-05, |
|
"loss": 1.2878, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.9693965315818787, |
|
"learning_rate": 9.070431748800699e-05, |
|
"loss": 1.3009, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.8897150754928589, |
|
"learning_rate": 9.068251199302225e-05, |
|
"loss": 1.2757, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.1614912748336792, |
|
"learning_rate": 9.066070649803751e-05, |
|
"loss": 1.2923, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.8832863569259644, |
|
"learning_rate": 9.063890100305277e-05, |
|
"loss": 1.3098, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.9805281162261963, |
|
"learning_rate": 9.061709550806804e-05, |
|
"loss": 1.2958, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.0199958086013794, |
|
"learning_rate": 9.05952900130833e-05, |
|
"loss": 1.2824, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.8528922200202942, |
|
"learning_rate": 9.057348451809856e-05, |
|
"loss": 1.2993, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.9288610816001892, |
|
"learning_rate": 9.055167902311384e-05, |
|
"loss": 1.2758, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.8977848887443542, |
|
"learning_rate": 9.052987352812909e-05, |
|
"loss": 1.2789, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.8637726902961731, |
|
"learning_rate": 9.050806803314435e-05, |
|
"loss": 1.2734, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.9056828022003174, |
|
"learning_rate": 9.048626253815962e-05, |
|
"loss": 1.272, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.9080044627189636, |
|
"learning_rate": 9.046445704317489e-05, |
|
"loss": 1.264, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.886441707611084, |
|
"learning_rate": 9.044265154819016e-05, |
|
"loss": 1.2752, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.02278470993042, |
|
"learning_rate": 9.04208460532054e-05, |
|
"loss": 1.2819, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.2792317867279053, |
|
"eval_runtime": 1502.3325, |
|
"eval_samples_per_second": 257.499, |
|
"eval_steps_per_second": 4.024, |
|
"step": 4606 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.08243727684021, |
|
"learning_rate": 9.039904055822067e-05, |
|
"loss": 1.3113, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.073258399963379, |
|
"learning_rate": 9.037723506323594e-05, |
|
"loss": 1.3031, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.9962953329086304, |
|
"learning_rate": 9.035542956825121e-05, |
|
"loss": 1.2904, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.9397081136703491, |
|
"learning_rate": 9.033362407326647e-05, |
|
"loss": 1.2672, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.9223260879516602, |
|
"learning_rate": 9.031181857828172e-05, |
|
"loss": 1.2898, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.0643510818481445, |
|
"learning_rate": 9.0290013083297e-05, |
|
"loss": 1.2831, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.9219188094139099, |
|
"learning_rate": 9.026820758831226e-05, |
|
"loss": 1.2651, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.9872779250144958, |
|
"learning_rate": 9.024640209332752e-05, |
|
"loss": 1.2695, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.9516711235046387, |
|
"learning_rate": 9.022459659834279e-05, |
|
"loss": 1.2662, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.9385516047477722, |
|
"learning_rate": 9.020279110335805e-05, |
|
"loss": 1.2744, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.0308866500854492, |
|
"learning_rate": 9.018098560837331e-05, |
|
"loss": 1.2718, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.9456400871276855, |
|
"learning_rate": 9.015918011338857e-05, |
|
"loss": 1.2494, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.1350531578063965, |
|
"learning_rate": 9.013737461840385e-05, |
|
"loss": 1.2607, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.9552891254425049, |
|
"learning_rate": 9.011556912341911e-05, |
|
"loss": 1.2563, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.9082231521606445, |
|
"learning_rate": 9.009376362843436e-05, |
|
"loss": 1.268, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.0419315099716187, |
|
"learning_rate": 9.007195813344963e-05, |
|
"loss": 1.3033, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.827100396156311, |
|
"learning_rate": 9.00501526384649e-05, |
|
"loss": 1.2636, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.0661678314208984, |
|
"learning_rate": 9.002834714348017e-05, |
|
"loss": 1.2487, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.9938476085662842, |
|
"learning_rate": 9.000654164849543e-05, |
|
"loss": 1.2729, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.1281195878982544, |
|
"learning_rate": 8.998473615351068e-05, |
|
"loss": 1.2391, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.1780451536178589, |
|
"learning_rate": 8.996293065852596e-05, |
|
"loss": 1.2985, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.0872817039489746, |
|
"learning_rate": 8.994112516354122e-05, |
|
"loss": 1.2615, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.9712433815002441, |
|
"learning_rate": 8.991931966855648e-05, |
|
"loss": 1.2694, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.2177668809890747, |
|
"learning_rate": 8.989751417357174e-05, |
|
"loss": 1.2726, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.9332715272903442, |
|
"learning_rate": 8.987570867858701e-05, |
|
"loss": 1.2703, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.9567763209342957, |
|
"learning_rate": 8.985390318360227e-05, |
|
"loss": 1.2711, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.9975143074989319, |
|
"learning_rate": 8.983209768861753e-05, |
|
"loss": 1.2947, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.0711029767990112, |
|
"learning_rate": 8.98102921936328e-05, |
|
"loss": 1.2723, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.9394287467002869, |
|
"learning_rate": 8.978848669864807e-05, |
|
"loss": 1.2709, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.0839319229125977, |
|
"learning_rate": 8.976668120366332e-05, |
|
"loss": 1.2892, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.024117112159729, |
|
"learning_rate": 8.974487570867859e-05, |
|
"loss": 1.2627, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.9055659174919128, |
|
"learning_rate": 8.972307021369386e-05, |
|
"loss": 1.2754, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.9383713603019714, |
|
"learning_rate": 8.970126471870913e-05, |
|
"loss": 1.2713, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.087470293045044, |
|
"learning_rate": 8.967945922372439e-05, |
|
"loss": 1.27, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.9602554440498352, |
|
"learning_rate": 8.965765372873964e-05, |
|
"loss": 1.2829, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.9457790851593018, |
|
"learning_rate": 8.963584823375491e-05, |
|
"loss": 1.2757, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.8682853579521179, |
|
"learning_rate": 8.961404273877018e-05, |
|
"loss": 1.2662, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.0000272989273071, |
|
"learning_rate": 8.959223724378544e-05, |
|
"loss": 1.2616, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.0122287273406982, |
|
"learning_rate": 8.95704317488007e-05, |
|
"loss": 1.287, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.9552735090255737, |
|
"learning_rate": 8.954862625381597e-05, |
|
"loss": 1.2782, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.9103166460990906, |
|
"learning_rate": 8.952682075883123e-05, |
|
"loss": 1.2388, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.0033226013183594, |
|
"learning_rate": 8.950501526384649e-05, |
|
"loss": 1.2762, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.9572534561157227, |
|
"learning_rate": 8.948320976886176e-05, |
|
"loss": 1.2801, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.9460912942886353, |
|
"learning_rate": 8.946140427387702e-05, |
|
"loss": 1.2651, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.0236018896102905, |
|
"learning_rate": 8.943959877889228e-05, |
|
"loss": 1.2602, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.0384821891784668, |
|
"learning_rate": 8.941779328390754e-05, |
|
"loss": 1.3027, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.9547539949417114, |
|
"learning_rate": 8.939598778892281e-05, |
|
"loss": 1.2969, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.9478334784507751, |
|
"learning_rate": 8.937418229393808e-05, |
|
"loss": 1.2829, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.0621150732040405, |
|
"learning_rate": 8.935237679895335e-05, |
|
"loss": 1.2601, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.9307476282119751, |
|
"learning_rate": 8.93305713039686e-05, |
|
"loss": 1.2656, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.0189131498336792, |
|
"learning_rate": 8.930876580898386e-05, |
|
"loss": 1.2646, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.1185131072998047, |
|
"learning_rate": 8.928696031399914e-05, |
|
"loss": 1.2785, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.9753584265708923, |
|
"learning_rate": 8.92651548190144e-05, |
|
"loss": 1.2511, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.0418280363082886, |
|
"learning_rate": 8.924334932402966e-05, |
|
"loss": 1.2537, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.9717410802841187, |
|
"learning_rate": 8.922154382904493e-05, |
|
"loss": 1.2687, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.988318681716919, |
|
"learning_rate": 8.919973833406019e-05, |
|
"loss": 1.2599, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.9211105108261108, |
|
"learning_rate": 8.917793283907545e-05, |
|
"loss": 1.2646, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.9481471180915833, |
|
"learning_rate": 8.915612734409071e-05, |
|
"loss": 1.271, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.8939971923828125, |
|
"learning_rate": 8.913432184910598e-05, |
|
"loss": 1.2865, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.9412124156951904, |
|
"learning_rate": 8.911251635412124e-05, |
|
"loss": 1.279, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.9381204843521118, |
|
"learning_rate": 8.90907108591365e-05, |
|
"loss": 1.2813, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.9502457976341248, |
|
"learning_rate": 8.906890536415177e-05, |
|
"loss": 1.2829, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.0576632022857666, |
|
"learning_rate": 8.904709986916704e-05, |
|
"loss": 1.2708, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.0302668809890747, |
|
"learning_rate": 8.902529437418229e-05, |
|
"loss": 1.2893, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.9892765283584595, |
|
"learning_rate": 8.900348887919756e-05, |
|
"loss": 1.2691, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.0383532047271729, |
|
"learning_rate": 8.898168338421282e-05, |
|
"loss": 1.2539, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.9894425868988037, |
|
"learning_rate": 8.89598778892281e-05, |
|
"loss": 1.2838, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.0066653490066528, |
|
"learning_rate": 8.893807239424336e-05, |
|
"loss": 1.2606, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.0619821548461914, |
|
"learning_rate": 8.891626689925862e-05, |
|
"loss": 1.2724, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.9619722962379456, |
|
"learning_rate": 8.889446140427387e-05, |
|
"loss": 1.2783, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.8887227177619934, |
|
"learning_rate": 8.887265590928915e-05, |
|
"loss": 1.264, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.0262665748596191, |
|
"learning_rate": 8.885085041430441e-05, |
|
"loss": 1.2482, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.016381859779358, |
|
"learning_rate": 8.882904491931967e-05, |
|
"loss": 1.2523, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.9932143092155457, |
|
"learning_rate": 8.880723942433494e-05, |
|
"loss": 1.2516, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.9815816283226013, |
|
"learning_rate": 8.87854339293502e-05, |
|
"loss": 1.2574, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.0072325468063354, |
|
"learning_rate": 8.876362843436546e-05, |
|
"loss": 1.2688, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.9834664463996887, |
|
"learning_rate": 8.874182293938073e-05, |
|
"loss": 1.2632, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.0800156593322754, |
|
"learning_rate": 8.8720017444396e-05, |
|
"loss": 1.2767, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.9449285268783569, |
|
"learning_rate": 8.869821194941125e-05, |
|
"loss": 1.2667, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.1136956214904785, |
|
"learning_rate": 8.867640645442652e-05, |
|
"loss": 1.2506, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.9061567783355713, |
|
"learning_rate": 8.865460095944178e-05, |
|
"loss": 1.2658, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.00759756565094, |
|
"learning_rate": 8.863279546445705e-05, |
|
"loss": 1.285, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.0507421493530273, |
|
"learning_rate": 8.861098996947232e-05, |
|
"loss": 1.277, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.0796302556991577, |
|
"learning_rate": 8.858918447448758e-05, |
|
"loss": 1.2604, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.0264052152633667, |
|
"learning_rate": 8.856737897950283e-05, |
|
"loss": 1.2747, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.9274656176567078, |
|
"learning_rate": 8.854557348451811e-05, |
|
"loss": 1.2617, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.0233980417251587, |
|
"learning_rate": 8.852376798953337e-05, |
|
"loss": 1.2787, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.9718747138977051, |
|
"learning_rate": 8.850196249454863e-05, |
|
"loss": 1.2511, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.0765981674194336, |
|
"learning_rate": 8.84801569995639e-05, |
|
"loss": 1.2794, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.048608660697937, |
|
"learning_rate": 8.845835150457916e-05, |
|
"loss": 1.2597, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.9524050354957581, |
|
"learning_rate": 8.843654600959442e-05, |
|
"loss": 1.246, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.9819397926330566, |
|
"learning_rate": 8.841474051460969e-05, |
|
"loss": 1.2732, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.914893388748169, |
|
"learning_rate": 8.839293501962495e-05, |
|
"loss": 1.2694, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.9561071395874023, |
|
"learning_rate": 8.837112952464021e-05, |
|
"loss": 1.2642, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.9841814637184143, |
|
"learning_rate": 8.834932402965547e-05, |
|
"loss": 1.2684, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.931611955165863, |
|
"learning_rate": 8.832751853467074e-05, |
|
"loss": 1.2751, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.0068223476409912, |
|
"learning_rate": 8.8305713039686e-05, |
|
"loss": 1.2589, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.088884711265564, |
|
"learning_rate": 8.828390754470128e-05, |
|
"loss": 1.2606, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.9682032465934753, |
|
"learning_rate": 8.826210204971653e-05, |
|
"loss": 1.2467, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.0218122005462646, |
|
"learning_rate": 8.824029655473179e-05, |
|
"loss": 1.2684, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.9690065979957581, |
|
"learning_rate": 8.821849105974707e-05, |
|
"loss": 1.2906, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.9736804366111755, |
|
"learning_rate": 8.819668556476233e-05, |
|
"loss": 1.2682, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.0571842193603516, |
|
"learning_rate": 8.817488006977759e-05, |
|
"loss": 1.247, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.1925692558288574, |
|
"learning_rate": 8.815307457479286e-05, |
|
"loss": 1.28, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.8674301505088806, |
|
"learning_rate": 8.813126907980812e-05, |
|
"loss": 1.2699, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.030501127243042, |
|
"learning_rate": 8.810946358482338e-05, |
|
"loss": 1.2455, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.0425055027008057, |
|
"learning_rate": 8.808765808983864e-05, |
|
"loss": 1.2802, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.9576709866523743, |
|
"learning_rate": 8.806585259485391e-05, |
|
"loss": 1.2584, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.9852989912033081, |
|
"learning_rate": 8.804404709986917e-05, |
|
"loss": 1.2707, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.0519157648086548, |
|
"learning_rate": 8.802224160488443e-05, |
|
"loss": 1.2647, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.1391375064849854, |
|
"learning_rate": 8.80004361098997e-05, |
|
"loss": 1.2459, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.295246958732605, |
|
"learning_rate": 8.797863061491496e-05, |
|
"loss": 1.2708, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.9388042688369751, |
|
"learning_rate": 8.795682511993024e-05, |
|
"loss": 1.2761, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.8345937728881836, |
|
"learning_rate": 8.793501962494549e-05, |
|
"loss": 1.2641, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.9559466242790222, |
|
"learning_rate": 8.791321412996075e-05, |
|
"loss": 1.2608, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.9135338068008423, |
|
"learning_rate": 8.789140863497601e-05, |
|
"loss": 1.245, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.0820287466049194, |
|
"learning_rate": 8.786960313999129e-05, |
|
"loss": 1.2549, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.05925714969635, |
|
"learning_rate": 8.784779764500655e-05, |
|
"loss": 1.2493, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.0629942417144775, |
|
"learning_rate": 8.782599215002181e-05, |
|
"loss": 1.2803, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.021894097328186, |
|
"learning_rate": 8.780418665503706e-05, |
|
"loss": 1.264, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.9319231510162354, |
|
"learning_rate": 8.778238116005234e-05, |
|
"loss": 1.2757, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.9403659701347351, |
|
"learning_rate": 8.77605756650676e-05, |
|
"loss": 1.2601, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.0411070585250854, |
|
"learning_rate": 8.773877017008287e-05, |
|
"loss": 1.2747, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.9437740445137024, |
|
"learning_rate": 8.771696467509813e-05, |
|
"loss": 1.2771, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.0971676111221313, |
|
"learning_rate": 8.769515918011339e-05, |
|
"loss": 1.2631, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.0248700380325317, |
|
"learning_rate": 8.767335368512866e-05, |
|
"loss": 1.255, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.1890584230422974, |
|
"learning_rate": 8.765154819014392e-05, |
|
"loss": 1.265, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.1310992240905762, |
|
"learning_rate": 8.76297426951592e-05, |
|
"loss": 1.2786, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.95496666431427, |
|
"learning_rate": 8.760793720017444e-05, |
|
"loss": 1.2534, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.0427186489105225, |
|
"learning_rate": 8.758613170518971e-05, |
|
"loss": 1.2767, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.879298985004425, |
|
"learning_rate": 8.756432621020497e-05, |
|
"loss": 1.2453, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.9911447167396545, |
|
"learning_rate": 8.754252071522025e-05, |
|
"loss": 1.248, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.9124498963356018, |
|
"learning_rate": 8.752071522023551e-05, |
|
"loss": 1.2588, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.9397348761558533, |
|
"learning_rate": 8.749890972525076e-05, |
|
"loss": 1.2822, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.0716569423675537, |
|
"learning_rate": 8.747710423026602e-05, |
|
"loss": 1.2483, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.8869634866714478, |
|
"learning_rate": 8.74552987352813e-05, |
|
"loss": 1.2752, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.9538241028785706, |
|
"learning_rate": 8.743349324029656e-05, |
|
"loss": 1.2627, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.9991753697395325, |
|
"learning_rate": 8.741168774531183e-05, |
|
"loss": 1.2718, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.0785272121429443, |
|
"learning_rate": 8.738988225032709e-05, |
|
"loss": 1.2826, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.002681851387024, |
|
"learning_rate": 8.736807675534235e-05, |
|
"loss": 1.2659, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.9270432591438293, |
|
"learning_rate": 8.734627126035761e-05, |
|
"loss": 1.2493, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 1.143751621246338, |
|
"learning_rate": 8.732446576537288e-05, |
|
"loss": 1.2965, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.9666625261306763, |
|
"learning_rate": 8.730266027038814e-05, |
|
"loss": 1.2553, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.9400457739830017, |
|
"learning_rate": 8.72808547754034e-05, |
|
"loss": 1.2657, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.9232240319252014, |
|
"learning_rate": 8.725904928041867e-05, |
|
"loss": 1.2494, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.9295173287391663, |
|
"learning_rate": 8.723724378543393e-05, |
|
"loss": 1.2496, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.293441653251648, |
|
"learning_rate": 8.72154382904492e-05, |
|
"loss": 1.2578, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.9575563669204712, |
|
"learning_rate": 8.719363279546447e-05, |
|
"loss": 1.2323, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.0204386711120605, |
|
"learning_rate": 8.717182730047972e-05, |
|
"loss": 1.2652, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.9446994066238403, |
|
"learning_rate": 8.715002180549498e-05, |
|
"loss": 1.2568, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.0751984119415283, |
|
"learning_rate": 8.712821631051026e-05, |
|
"loss": 1.2806, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.9466795921325684, |
|
"learning_rate": 8.710641081552552e-05, |
|
"loss": 1.2416, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.1114068031311035, |
|
"learning_rate": 8.708460532054078e-05, |
|
"loss": 1.2405, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.9612728953361511, |
|
"learning_rate": 8.706279982555605e-05, |
|
"loss": 1.2655, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.9728400707244873, |
|
"learning_rate": 8.704099433057131e-05, |
|
"loss": 1.2654, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.0217069387435913, |
|
"learning_rate": 8.701918883558657e-05, |
|
"loss": 1.2804, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.9358672499656677, |
|
"learning_rate": 8.699738334060184e-05, |
|
"loss": 1.282, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.875811755657196, |
|
"learning_rate": 8.69755778456171e-05, |
|
"loss": 1.2974, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.9315816760063171, |
|
"learning_rate": 8.695377235063236e-05, |
|
"loss": 1.2515, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.9914236664772034, |
|
"learning_rate": 8.693196685564763e-05, |
|
"loss": 1.2438, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.9291836023330688, |
|
"learning_rate": 8.691016136066289e-05, |
|
"loss": 1.2794, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.036189317703247, |
|
"learning_rate": 8.688835586567815e-05, |
|
"loss": 1.2497, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.1179789304733276, |
|
"learning_rate": 8.686655037069343e-05, |
|
"loss": 1.2627, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.0586695671081543, |
|
"learning_rate": 8.684474487570868e-05, |
|
"loss": 1.2611, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.9113835692405701, |
|
"learning_rate": 8.682293938072394e-05, |
|
"loss": 1.2671, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.911665141582489, |
|
"learning_rate": 8.68011338857392e-05, |
|
"loss": 1.2425, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.016471266746521, |
|
"learning_rate": 8.677932839075448e-05, |
|
"loss": 1.2672, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.0666197538375854, |
|
"learning_rate": 8.675752289576974e-05, |
|
"loss": 1.2647, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.042350172996521, |
|
"learning_rate": 8.673571740078499e-05, |
|
"loss": 1.2211, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.9714857339859009, |
|
"learning_rate": 8.671391190580027e-05, |
|
"loss": 1.2698, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.9044662714004517, |
|
"learning_rate": 8.669210641081553e-05, |
|
"loss": 1.2753, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.8921557664871216, |
|
"learning_rate": 8.66703009158308e-05, |
|
"loss": 1.2528, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.9644028544425964, |
|
"learning_rate": 8.664849542084606e-05, |
|
"loss": 1.2642, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.0202399492263794, |
|
"learning_rate": 8.662668992586132e-05, |
|
"loss": 1.2473, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.0238714218139648, |
|
"learning_rate": 8.660488443087658e-05, |
|
"loss": 1.256, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.1190308332443237, |
|
"learning_rate": 8.658307893589185e-05, |
|
"loss": 1.2579, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.9763012528419495, |
|
"learning_rate": 8.656127344090711e-05, |
|
"loss": 1.2607, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.9133914709091187, |
|
"learning_rate": 8.653946794592239e-05, |
|
"loss": 1.2685, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.9674580693244934, |
|
"learning_rate": 8.651766245093764e-05, |
|
"loss": 1.2533, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.1029064655303955, |
|
"learning_rate": 8.64958569559529e-05, |
|
"loss": 1.2487, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.9458103775978088, |
|
"learning_rate": 8.647405146096816e-05, |
|
"loss": 1.2677, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.1092442274093628, |
|
"learning_rate": 8.645224596598344e-05, |
|
"loss": 1.2624, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.1490038633346558, |
|
"learning_rate": 8.64304404709987e-05, |
|
"loss": 1.2566, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.9747464060783386, |
|
"learning_rate": 8.640863497601395e-05, |
|
"loss": 1.2571, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.1297920942306519, |
|
"learning_rate": 8.638682948102921e-05, |
|
"loss": 1.2327, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.9675096869468689, |
|
"learning_rate": 8.636502398604449e-05, |
|
"loss": 1.2327, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.9282464385032654, |
|
"learning_rate": 8.634321849105975e-05, |
|
"loss": 1.2323, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.011017918586731, |
|
"learning_rate": 8.632141299607502e-05, |
|
"loss": 1.2429, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.02436363697052, |
|
"learning_rate": 8.629960750109028e-05, |
|
"loss": 1.2382, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.0600727796554565, |
|
"learning_rate": 8.627780200610554e-05, |
|
"loss": 1.2689, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.9400041103363037, |
|
"learning_rate": 8.62559965111208e-05, |
|
"loss": 1.2804, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.156300663948059, |
|
"learning_rate": 8.623419101613607e-05, |
|
"loss": 1.2596, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.9240378141403198, |
|
"learning_rate": 8.621238552115133e-05, |
|
"loss": 1.24, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.8798494338989258, |
|
"learning_rate": 8.61905800261666e-05, |
|
"loss": 1.2526, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.9512797594070435, |
|
"learning_rate": 8.616877453118186e-05, |
|
"loss": 1.2602, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.9985531568527222, |
|
"learning_rate": 8.614696903619712e-05, |
|
"loss": 1.2616, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.134756088256836, |
|
"learning_rate": 8.61251635412124e-05, |
|
"loss": 1.2688, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.9372296333312988, |
|
"learning_rate": 8.610335804622766e-05, |
|
"loss": 1.2538, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.011887788772583, |
|
"learning_rate": 8.608155255124291e-05, |
|
"loss": 1.246, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.9553661346435547, |
|
"learning_rate": 8.605974705625817e-05, |
|
"loss": 1.2502, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.9924313426017761, |
|
"learning_rate": 8.603794156127345e-05, |
|
"loss": 1.2362, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.05217707157135, |
|
"learning_rate": 8.601613606628871e-05, |
|
"loss": 1.2655, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.0302504301071167, |
|
"learning_rate": 8.599433057130398e-05, |
|
"loss": 1.2699, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.043373942375183, |
|
"learning_rate": 8.597252507631923e-05, |
|
"loss": 1.2532, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.9535781145095825, |
|
"learning_rate": 8.59507195813345e-05, |
|
"loss": 1.2586, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.1055347919464111, |
|
"learning_rate": 8.592891408634977e-05, |
|
"loss": 1.2632, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.0888850688934326, |
|
"learning_rate": 8.590710859136503e-05, |
|
"loss": 1.2497, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.9970211386680603, |
|
"learning_rate": 8.588530309638029e-05, |
|
"loss": 1.2869, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.0836609601974487, |
|
"learning_rate": 8.586349760139555e-05, |
|
"loss": 1.2321, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.9511786103248596, |
|
"learning_rate": 8.584169210641082e-05, |
|
"loss": 1.2562, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.088644027709961, |
|
"learning_rate": 8.581988661142608e-05, |
|
"loss": 1.2418, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.0465929508209229, |
|
"learning_rate": 8.579808111644134e-05, |
|
"loss": 1.2608, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.12638521194458, |
|
"learning_rate": 8.577627562145662e-05, |
|
"loss": 1.2725, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.171322226524353, |
|
"learning_rate": 8.575447012647187e-05, |
|
"loss": 1.265, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.926113486289978, |
|
"learning_rate": 8.573266463148713e-05, |
|
"loss": 1.2559, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.9716551899909973, |
|
"learning_rate": 8.57108591365024e-05, |
|
"loss": 1.2568, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.0213953256607056, |
|
"learning_rate": 8.568905364151767e-05, |
|
"loss": 1.2649, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.9643402099609375, |
|
"learning_rate": 8.566724814653294e-05, |
|
"loss": 1.2433, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.0367106199264526, |
|
"learning_rate": 8.564544265154819e-05, |
|
"loss": 1.2356, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.9655973315238953, |
|
"learning_rate": 8.562363715656346e-05, |
|
"loss": 1.2439, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.0422053337097168, |
|
"learning_rate": 8.560183166157872e-05, |
|
"loss": 1.2528, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.9676966071128845, |
|
"learning_rate": 8.558002616659399e-05, |
|
"loss": 1.2577, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.9732950329780579, |
|
"learning_rate": 8.555822067160925e-05, |
|
"loss": 1.2513, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.0636634826660156, |
|
"learning_rate": 8.553641517662451e-05, |
|
"loss": 1.2694, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.9392173290252686, |
|
"learning_rate": 8.551460968163978e-05, |
|
"loss": 1.2478, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.9402878880500793, |
|
"learning_rate": 8.549280418665504e-05, |
|
"loss": 1.2528, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.0256085395812988, |
|
"learning_rate": 8.54709986916703e-05, |
|
"loss": 1.2704, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.0600332021713257, |
|
"learning_rate": 8.544919319668558e-05, |
|
"loss": 1.2338, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.0218205451965332, |
|
"learning_rate": 8.542738770170083e-05, |
|
"loss": 1.2839, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.8786155581474304, |
|
"learning_rate": 8.540558220671609e-05, |
|
"loss": 1.248, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.9721015095710754, |
|
"learning_rate": 8.538377671173136e-05, |
|
"loss": 1.2734, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.9734498858451843, |
|
"learning_rate": 8.536197121674663e-05, |
|
"loss": 1.2454, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.9616742730140686, |
|
"learning_rate": 8.53401657217619e-05, |
|
"loss": 1.2565, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.153671383857727, |
|
"learning_rate": 8.531836022677714e-05, |
|
"loss": 1.2549, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.9344118237495422, |
|
"learning_rate": 8.529655473179241e-05, |
|
"loss": 1.2431, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.0228878259658813, |
|
"learning_rate": 8.527474923680768e-05, |
|
"loss": 1.2276, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.088304042816162, |
|
"learning_rate": 8.525294374182295e-05, |
|
"loss": 1.2423, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.9886937737464905, |
|
"learning_rate": 8.523113824683821e-05, |
|
"loss": 1.2693, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.8818524479866028, |
|
"learning_rate": 8.520933275185346e-05, |
|
"loss": 1.2424, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.9912683963775635, |
|
"learning_rate": 8.518752725686874e-05, |
|
"loss": 1.2522, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.9952061176300049, |
|
"learning_rate": 8.5165721761884e-05, |
|
"loss": 1.2519, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.035301923751831, |
|
"learning_rate": 8.514391626689926e-05, |
|
"loss": 1.2501, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.0349431037902832, |
|
"learning_rate": 8.512211077191452e-05, |
|
"loss": 1.2451, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.9751808643341064, |
|
"learning_rate": 8.510030527692979e-05, |
|
"loss": 1.2381, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.896840512752533, |
|
"learning_rate": 8.507849978194505e-05, |
|
"loss": 1.2509, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.074179768562317, |
|
"learning_rate": 8.505669428696031e-05, |
|
"loss": 1.2439, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.0536302328109741, |
|
"learning_rate": 8.503488879197559e-05, |
|
"loss": 1.2795, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.9011424779891968, |
|
"learning_rate": 8.501308329699085e-05, |
|
"loss": 1.2418, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.9322314262390137, |
|
"learning_rate": 8.49912778020061e-05, |
|
"loss": 1.2576, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.9793155193328857, |
|
"learning_rate": 8.496947230702137e-05, |
|
"loss": 1.2492, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.9420814514160156, |
|
"learning_rate": 8.494766681203664e-05, |
|
"loss": 1.2373, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.8934997320175171, |
|
"learning_rate": 8.49258613170519e-05, |
|
"loss": 1.2433, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.0100373029708862, |
|
"learning_rate": 8.490405582206717e-05, |
|
"loss": 1.2397, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.9812464118003845, |
|
"learning_rate": 8.488225032708242e-05, |
|
"loss": 1.2536, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.0419830083847046, |
|
"learning_rate": 8.48604448320977e-05, |
|
"loss": 1.2531, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.0287178754806519, |
|
"learning_rate": 8.483863933711296e-05, |
|
"loss": 1.2853, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.9258010983467102, |
|
"learning_rate": 8.481683384212822e-05, |
|
"loss": 1.2384, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.0923179388046265, |
|
"learning_rate": 8.479502834714348e-05, |
|
"loss": 1.2388, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.026920199394226, |
|
"learning_rate": 8.477322285215875e-05, |
|
"loss": 1.2403, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.071996808052063, |
|
"learning_rate": 8.475141735717401e-05, |
|
"loss": 1.257, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.0824863910675049, |
|
"learning_rate": 8.472961186218927e-05, |
|
"loss": 1.2358, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.006395697593689, |
|
"learning_rate": 8.470780636720454e-05, |
|
"loss": 1.2675, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.9629374146461487, |
|
"learning_rate": 8.468600087221981e-05, |
|
"loss": 1.2377, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.9439448714256287, |
|
"learning_rate": 8.466419537723506e-05, |
|
"loss": 1.2269, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.9413838386535645, |
|
"learning_rate": 8.464238988225033e-05, |
|
"loss": 1.248, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.9353733658790588, |
|
"learning_rate": 8.46205843872656e-05, |
|
"loss": 1.2535, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.0403653383255005, |
|
"learning_rate": 8.459877889228086e-05, |
|
"loss": 1.2323, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.8675696849822998, |
|
"learning_rate": 8.457697339729613e-05, |
|
"loss": 1.2712, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.9282375574111938, |
|
"learning_rate": 8.455516790231138e-05, |
|
"loss": 1.2259, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.9778069853782654, |
|
"learning_rate": 8.453336240732665e-05, |
|
"loss": 1.2499, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.0154436826705933, |
|
"learning_rate": 8.451155691234192e-05, |
|
"loss": 1.2253, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.9822314381599426, |
|
"learning_rate": 8.448975141735718e-05, |
|
"loss": 1.2583, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.0584256649017334, |
|
"learning_rate": 8.446794592237244e-05, |
|
"loss": 1.2682, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.035949945449829, |
|
"learning_rate": 8.44461404273877e-05, |
|
"loss": 1.2604, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.9688887596130371, |
|
"learning_rate": 8.442433493240297e-05, |
|
"loss": 1.2308, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.0668280124664307, |
|
"learning_rate": 8.440252943741823e-05, |
|
"loss": 1.2523, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.0507837533950806, |
|
"learning_rate": 8.43807239424335e-05, |
|
"loss": 1.2458, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.9705730676651001, |
|
"learning_rate": 8.435891844744876e-05, |
|
"loss": 1.2623, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.1198492050170898, |
|
"learning_rate": 8.433711295246402e-05, |
|
"loss": 1.2263, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.090376853942871, |
|
"learning_rate": 8.431530745747928e-05, |
|
"loss": 1.2549, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.9599369764328003, |
|
"learning_rate": 8.429350196249455e-05, |
|
"loss": 1.2453, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.9473201036453247, |
|
"learning_rate": 8.427169646750982e-05, |
|
"loss": 1.2449, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.0158095359802246, |
|
"learning_rate": 8.424989097252509e-05, |
|
"loss": 1.2395, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.1401153802871704, |
|
"learning_rate": 8.422808547754034e-05, |
|
"loss": 1.2426, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.9833976030349731, |
|
"learning_rate": 8.42062799825556e-05, |
|
"loss": 1.2238, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.0531307458877563, |
|
"learning_rate": 8.418447448757088e-05, |
|
"loss": 1.2286, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.9833014607429504, |
|
"learning_rate": 8.416266899258614e-05, |
|
"loss": 1.2483, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.0215846300125122, |
|
"learning_rate": 8.41408634976014e-05, |
|
"loss": 1.2434, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.9338911175727844, |
|
"learning_rate": 8.411905800261667e-05, |
|
"loss": 1.2263, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.9091663360595703, |
|
"learning_rate": 8.409725250763193e-05, |
|
"loss": 1.2359, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.9303663969039917, |
|
"learning_rate": 8.407544701264719e-05, |
|
"loss": 1.243, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.9787565469741821, |
|
"learning_rate": 8.405364151766245e-05, |
|
"loss": 1.2444, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.1064313650131226, |
|
"learning_rate": 8.403183602267772e-05, |
|
"loss": 1.2438, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.9433283805847168, |
|
"learning_rate": 8.401003052769298e-05, |
|
"loss": 1.2442, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.9914006590843201, |
|
"learning_rate": 8.398822503270824e-05, |
|
"loss": 1.2595, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.1178406476974487, |
|
"learning_rate": 8.39664195377235e-05, |
|
"loss": 1.2223, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.1177582740783691, |
|
"learning_rate": 8.394461404273878e-05, |
|
"loss": 1.2284, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.0288305282592773, |
|
"learning_rate": 8.392280854775405e-05, |
|
"loss": 1.2329, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.078165054321289, |
|
"learning_rate": 8.39010030527693e-05, |
|
"loss": 1.2149, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.0270469188690186, |
|
"learning_rate": 8.387919755778456e-05, |
|
"loss": 1.2453, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.142359733581543, |
|
"learning_rate": 8.385739206279984e-05, |
|
"loss": 1.2115, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.066074252128601, |
|
"learning_rate": 8.38355865678151e-05, |
|
"loss": 1.2282, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.9854233860969543, |
|
"learning_rate": 8.381378107283036e-05, |
|
"loss": 1.25, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.0901075601577759, |
|
"learning_rate": 8.379197557784561e-05, |
|
"loss": 1.2237, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.1587127447128296, |
|
"learning_rate": 8.377017008286089e-05, |
|
"loss": 1.219, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.9623563289642334, |
|
"learning_rate": 8.374836458787615e-05, |
|
"loss": 1.2311, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.9470689296722412, |
|
"learning_rate": 8.372655909289141e-05, |
|
"loss": 1.2515, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.9638876914978027, |
|
"learning_rate": 8.370475359790668e-05, |
|
"loss": 1.2532, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.163567304611206, |
|
"learning_rate": 8.368294810292194e-05, |
|
"loss": 1.2615, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.001160979270935, |
|
"learning_rate": 8.36611426079372e-05, |
|
"loss": 1.2472, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.0169782638549805, |
|
"learning_rate": 8.363933711295247e-05, |
|
"loss": 1.2473, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.9867805242538452, |
|
"learning_rate": 8.361753161796774e-05, |
|
"loss": 1.2452, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.0535905361175537, |
|
"learning_rate": 8.359572612298299e-05, |
|
"loss": 1.2405, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.9246835708618164, |
|
"learning_rate": 8.357392062799825e-05, |
|
"loss": 1.2522, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.0927287340164185, |
|
"learning_rate": 8.355211513301352e-05, |
|
"loss": 1.2493, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.054208755493164, |
|
"learning_rate": 8.35303096380288e-05, |
|
"loss": 1.263, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.9636792540550232, |
|
"learning_rate": 8.350850414304406e-05, |
|
"loss": 1.2426, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.0837719440460205, |
|
"learning_rate": 8.348669864805932e-05, |
|
"loss": 1.2265, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.9462710022926331, |
|
"learning_rate": 8.346489315307457e-05, |
|
"loss": 1.2242, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.987519383430481, |
|
"learning_rate": 8.344308765808985e-05, |
|
"loss": 1.2261, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.0755093097686768, |
|
"learning_rate": 8.342128216310511e-05, |
|
"loss": 1.2486, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.9885231852531433, |
|
"learning_rate": 8.339947666812037e-05, |
|
"loss": 1.2325, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.0870469808578491, |
|
"learning_rate": 8.337767117313564e-05, |
|
"loss": 1.2175, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.0006695985794067, |
|
"learning_rate": 8.33558656781509e-05, |
|
"loss": 1.2521, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.0880390405654907, |
|
"learning_rate": 8.333406018316616e-05, |
|
"loss": 1.2353, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.9993226528167725, |
|
"learning_rate": 8.331225468818142e-05, |
|
"loss": 1.2365, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.964745819568634, |
|
"learning_rate": 8.329044919319669e-05, |
|
"loss": 1.2566, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.9665801525115967, |
|
"learning_rate": 8.326864369821195e-05, |
|
"loss": 1.2266, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.0917197465896606, |
|
"learning_rate": 8.324683820322721e-05, |
|
"loss": 1.2457, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.1263692378997803, |
|
"learning_rate": 8.322503270824248e-05, |
|
"loss": 1.2312, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.9168413877487183, |
|
"learning_rate": 8.320322721325774e-05, |
|
"loss": 1.223, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.9771096706390381, |
|
"learning_rate": 8.318142171827302e-05, |
|
"loss": 1.2219, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.9901739358901978, |
|
"learning_rate": 8.315961622328828e-05, |
|
"loss": 1.2405, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.004320502281189, |
|
"learning_rate": 8.313781072830353e-05, |
|
"loss": 1.2584, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.897678554058075, |
|
"learning_rate": 8.31160052333188e-05, |
|
"loss": 1.2359, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.9914141893386841, |
|
"learning_rate": 8.309419973833407e-05, |
|
"loss": 1.2269, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.1783164739608765, |
|
"learning_rate": 8.307239424334933e-05, |
|
"loss": 1.2208, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.0260601043701172, |
|
"learning_rate": 8.30505887483646e-05, |
|
"loss": 1.2206, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.9606086015701294, |
|
"learning_rate": 8.302878325337986e-05, |
|
"loss": 1.246, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.0758907794952393, |
|
"learning_rate": 8.300697775839512e-05, |
|
"loss": 1.2386, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.9541261792182922, |
|
"learning_rate": 8.298517226341038e-05, |
|
"loss": 1.2554, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.130035161972046, |
|
"learning_rate": 8.296336676842565e-05, |
|
"loss": 1.2292, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.9219099879264832, |
|
"learning_rate": 8.294156127344091e-05, |
|
"loss": 1.2486, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.9194048643112183, |
|
"learning_rate": 8.291975577845617e-05, |
|
"loss": 1.2065, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.0724278688430786, |
|
"learning_rate": 8.289795028347144e-05, |
|
"loss": 1.232, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.0829250812530518, |
|
"learning_rate": 8.28761447884867e-05, |
|
"loss": 1.2374, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.9441924691200256, |
|
"learning_rate": 8.285433929350198e-05, |
|
"loss": 1.2248, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.0257307291030884, |
|
"learning_rate": 8.283253379851722e-05, |
|
"loss": 1.2356, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.8646122813224792, |
|
"learning_rate": 8.281072830353249e-05, |
|
"loss": 1.2497, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.100232481956482, |
|
"learning_rate": 8.278892280854775e-05, |
|
"loss": 1.2365, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0597792863845825, |
|
"learning_rate": 8.276711731356303e-05, |
|
"loss": 1.2403, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0088367462158203, |
|
"learning_rate": 8.274531181857829e-05, |
|
"loss": 1.2281, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0818982124328613, |
|
"learning_rate": 8.272350632359355e-05, |
|
"loss": 1.2427, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.9281474947929382, |
|
"learning_rate": 8.27017008286088e-05, |
|
"loss": 1.2595, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.9748603105545044, |
|
"learning_rate": 8.267989533362408e-05, |
|
"loss": 1.248, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.027099370956421, |
|
"learning_rate": 8.265808983863934e-05, |
|
"loss": 1.2313, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.0615408420562744, |
|
"learning_rate": 8.26362843436546e-05, |
|
"loss": 1.2549, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.9190282225608826, |
|
"learning_rate": 8.261447884866987e-05, |
|
"loss": 1.2169, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.9824718236923218, |
|
"learning_rate": 8.259267335368513e-05, |
|
"loss": 1.2505, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.9848600029945374, |
|
"learning_rate": 8.25708678587004e-05, |
|
"loss": 1.2414, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.9373934268951416, |
|
"learning_rate": 8.254906236371566e-05, |
|
"loss": 1.2294, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.0315806865692139, |
|
"learning_rate": 8.252725686873093e-05, |
|
"loss": 1.2259, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.0654377937316895, |
|
"learning_rate": 8.250545137374618e-05, |
|
"loss": 1.249, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.0188405513763428, |
|
"learning_rate": 8.248364587876145e-05, |
|
"loss": 1.2361, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.9202408790588379, |
|
"learning_rate": 8.246184038377671e-05, |
|
"loss": 1.2344, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.953535795211792, |
|
"learning_rate": 8.244003488879199e-05, |
|
"loss": 1.2439, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.8910773992538452, |
|
"learning_rate": 8.241822939380725e-05, |
|
"loss": 1.2417, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.0123344659805298, |
|
"learning_rate": 8.23964238988225e-05, |
|
"loss": 1.2437, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.9692454934120178, |
|
"learning_rate": 8.237461840383776e-05, |
|
"loss": 1.2414, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.2110908031463623, |
|
"learning_rate": 8.235281290885304e-05, |
|
"loss": 1.2273, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.9399771690368652, |
|
"learning_rate": 8.23310074138683e-05, |
|
"loss": 1.2305, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.0485948324203491, |
|
"learning_rate": 8.230920191888356e-05, |
|
"loss": 1.2243, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.1290273666381836, |
|
"learning_rate": 8.228739642389883e-05, |
|
"loss": 1.2647, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.113707184791565, |
|
"learning_rate": 8.226559092891409e-05, |
|
"loss": 1.2396, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.161978006362915, |
|
"learning_rate": 8.224378543392935e-05, |
|
"loss": 1.2371, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.075077772140503, |
|
"learning_rate": 8.222197993894462e-05, |
|
"loss": 1.2326, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.9579611420631409, |
|
"learning_rate": 8.220017444395988e-05, |
|
"loss": 1.2212, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.0509251356124878, |
|
"learning_rate": 8.217836894897514e-05, |
|
"loss": 1.2234, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.02772057056427, |
|
"learning_rate": 8.21565634539904e-05, |
|
"loss": 1.212, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.0468199253082275, |
|
"learning_rate": 8.213475795900567e-05, |
|
"loss": 1.2328, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.9836091995239258, |
|
"learning_rate": 8.211295246402095e-05, |
|
"loss": 1.2368, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.0582927465438843, |
|
"learning_rate": 8.209114696903621e-05, |
|
"loss": 1.2466, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.039549708366394, |
|
"learning_rate": 8.206934147405146e-05, |
|
"loss": 1.2334, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.9211510419845581, |
|
"learning_rate": 8.204753597906672e-05, |
|
"loss": 1.2205, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.019851565361023, |
|
"learning_rate": 8.2025730484082e-05, |
|
"loss": 1.2416, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.0609748363494873, |
|
"learning_rate": 8.200392498909726e-05, |
|
"loss": 1.2315, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.1158742904663086, |
|
"learning_rate": 8.198211949411252e-05, |
|
"loss": 1.2485, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.8996789455413818, |
|
"learning_rate": 8.196031399912779e-05, |
|
"loss": 1.2309, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.9898722171783447, |
|
"learning_rate": 8.193850850414305e-05, |
|
"loss": 1.236, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1.1336474418640137, |
|
"learning_rate": 8.191670300915831e-05, |
|
"loss": 1.2375, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.9630258679389954, |
|
"learning_rate": 8.189489751417358e-05, |
|
"loss": 1.2462, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.9450762271881104, |
|
"learning_rate": 8.187309201918884e-05, |
|
"loss": 1.221, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.9798605442047119, |
|
"learning_rate": 8.18512865242041e-05, |
|
"loss": 1.2222, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.9023801684379578, |
|
"learning_rate": 8.182948102921936e-05, |
|
"loss": 1.2193, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.9918519258499146, |
|
"learning_rate": 8.180767553423463e-05, |
|
"loss": 1.2538, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.078640341758728, |
|
"learning_rate": 8.178587003924989e-05, |
|
"loss": 1.2239, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.1001946926116943, |
|
"learning_rate": 8.176406454426517e-05, |
|
"loss": 1.2542, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.9115540385246277, |
|
"learning_rate": 8.174225904928042e-05, |
|
"loss": 1.2231, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.0351630449295044, |
|
"learning_rate": 8.172045355429568e-05, |
|
"loss": 1.2328, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.1193772554397583, |
|
"learning_rate": 8.169864805931094e-05, |
|
"loss": 1.2344, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.926569402217865, |
|
"learning_rate": 8.167684256432622e-05, |
|
"loss": 1.2318, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.1995497941970825, |
|
"learning_rate": 8.165503706934148e-05, |
|
"loss": 1.2645, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.0718098878860474, |
|
"learning_rate": 8.163323157435673e-05, |
|
"loss": 1.2372, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.0319968461990356, |
|
"learning_rate": 8.161142607937201e-05, |
|
"loss": 1.222, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.0868433713912964, |
|
"learning_rate": 8.158962058438727e-05, |
|
"loss": 1.2381, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.0332001447677612, |
|
"learning_rate": 8.156781508940253e-05, |
|
"loss": 1.208, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.050507664680481, |
|
"learning_rate": 8.15460095944178e-05, |
|
"loss": 1.2276, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.9764347672462463, |
|
"learning_rate": 8.152420409943306e-05, |
|
"loss": 1.2289, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.9142500758171082, |
|
"learning_rate": 8.150239860444832e-05, |
|
"loss": 1.2109, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.028554916381836, |
|
"learning_rate": 8.148059310946359e-05, |
|
"loss": 1.2245, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.09976327419281, |
|
"learning_rate": 8.145878761447885e-05, |
|
"loss": 1.2387, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.0482656955718994, |
|
"learning_rate": 8.143698211949413e-05, |
|
"loss": 1.2225, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.953663170337677, |
|
"learning_rate": 8.141517662450938e-05, |
|
"loss": 1.2605, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.0766589641571045, |
|
"learning_rate": 8.139337112952464e-05, |
|
"loss": 1.2348, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.1204911470413208, |
|
"learning_rate": 8.13715656345399e-05, |
|
"loss": 1.2248, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.0836663246154785, |
|
"learning_rate": 8.134976013955518e-05, |
|
"loss": 1.2463, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.0038310289382935, |
|
"learning_rate": 8.132795464457044e-05, |
|
"loss": 1.2415, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.9727823138237, |
|
"learning_rate": 8.130614914958569e-05, |
|
"loss": 1.2291, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.9913771748542786, |
|
"learning_rate": 8.128434365460095e-05, |
|
"loss": 1.2374, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.0077624320983887, |
|
"learning_rate": 8.126253815961623e-05, |
|
"loss": 1.2126, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.9802316427230835, |
|
"learning_rate": 8.12407326646315e-05, |
|
"loss": 1.2084, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.1375538110733032, |
|
"learning_rate": 8.121892716964676e-05, |
|
"loss": 1.231, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.0553092956542969, |
|
"learning_rate": 8.119712167466202e-05, |
|
"loss": 1.2132, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.9583929777145386, |
|
"learning_rate": 8.117531617967728e-05, |
|
"loss": 1.2492, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.1101999282836914, |
|
"learning_rate": 8.115351068469255e-05, |
|
"loss": 1.2381, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.9837037920951843, |
|
"learning_rate": 8.113170518970781e-05, |
|
"loss": 1.2122, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.9561728835105896, |
|
"learning_rate": 8.110989969472309e-05, |
|
"loss": 1.2371, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.0024539232254028, |
|
"learning_rate": 8.108809419973834e-05, |
|
"loss": 1.2421, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.8823496103286743, |
|
"learning_rate": 8.10662887047536e-05, |
|
"loss": 1.2221, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.9598950743675232, |
|
"learning_rate": 8.104448320976886e-05, |
|
"loss": 1.2043, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.165281057357788, |
|
"learning_rate": 8.102267771478414e-05, |
|
"loss": 1.2261, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.9209827184677124, |
|
"learning_rate": 8.10008722197994e-05, |
|
"loss": 1.2196, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.023848056793213, |
|
"learning_rate": 8.097906672481465e-05, |
|
"loss": 1.2393, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.0043749809265137, |
|
"learning_rate": 8.095726122982991e-05, |
|
"loss": 1.2362, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.9257699251174927, |
|
"learning_rate": 8.093545573484519e-05, |
|
"loss": 1.2258, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.1696765422821045, |
|
"learning_rate": 8.091365023986045e-05, |
|
"loss": 1.2459, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.9257934093475342, |
|
"learning_rate": 8.089184474487572e-05, |
|
"loss": 1.2492, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.1503798961639404, |
|
"learning_rate": 8.087003924989097e-05, |
|
"loss": 1.2311, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.1405220031738281, |
|
"learning_rate": 8.084823375490624e-05, |
|
"loss": 1.2409, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.976625382900238, |
|
"learning_rate": 8.08264282599215e-05, |
|
"loss": 1.2266, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.9233745336532593, |
|
"learning_rate": 8.080462276493677e-05, |
|
"loss": 1.2261, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.0994141101837158, |
|
"learning_rate": 8.078281726995203e-05, |
|
"loss": 1.2352, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.9999457001686096, |
|
"learning_rate": 8.07610117749673e-05, |
|
"loss": 1.2238, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.0037119388580322, |
|
"learning_rate": 8.073920627998256e-05, |
|
"loss": 1.2439, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.9493910670280457, |
|
"learning_rate": 8.071740078499782e-05, |
|
"loss": 1.2253, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.099271535873413, |
|
"learning_rate": 8.069559529001308e-05, |
|
"loss": 1.211, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.9729533791542053, |
|
"learning_rate": 8.067378979502836e-05, |
|
"loss": 1.2257, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.112057089805603, |
|
"learning_rate": 8.065198430004361e-05, |
|
"loss": 1.2092, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.9645751714706421, |
|
"learning_rate": 8.063017880505887e-05, |
|
"loss": 1.2123, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.0263340473175049, |
|
"learning_rate": 8.060837331007415e-05, |
|
"loss": 1.2033, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.1131114959716797, |
|
"learning_rate": 8.058656781508941e-05, |
|
"loss": 1.2303, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.1425633430480957, |
|
"learning_rate": 8.056476232010468e-05, |
|
"loss": 1.2166, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.9223284721374512, |
|
"learning_rate": 8.054295682511992e-05, |
|
"loss": 1.2588, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.9477842450141907, |
|
"learning_rate": 8.05211513301352e-05, |
|
"loss": 1.2028, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.0649006366729736, |
|
"learning_rate": 8.049934583515046e-05, |
|
"loss": 1.2238, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.0043710470199585, |
|
"learning_rate": 8.047754034016573e-05, |
|
"loss": 1.2301, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.0217610597610474, |
|
"learning_rate": 8.045573484518099e-05, |
|
"loss": 1.2406, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.9688403606414795, |
|
"learning_rate": 8.043392935019625e-05, |
|
"loss": 1.2364, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.095987319946289, |
|
"learning_rate": 8.041212385521152e-05, |
|
"loss": 1.241, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9398607611656189, |
|
"learning_rate": 8.039031836022678e-05, |
|
"loss": 1.226, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9815939664840698, |
|
"learning_rate": 8.036851286524204e-05, |
|
"loss": 1.2181, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.2817823886871338, |
|
"eval_runtime": 1495.0675, |
|
"eval_samples_per_second": 258.75, |
|
"eval_steps_per_second": 4.043, |
|
"step": 9212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0157184600830078, |
|
"learning_rate": 8.034670737025732e-05, |
|
"loss": 1.2142, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9625092148780823, |
|
"learning_rate": 8.032490187527257e-05, |
|
"loss": 1.2089, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.9196017384529114, |
|
"learning_rate": 8.030309638028783e-05, |
|
"loss": 1.2335, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.9308544397354126, |
|
"learning_rate": 8.02812908853031e-05, |
|
"loss": 1.2163, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 1.2144242525100708, |
|
"learning_rate": 8.025948539031837e-05, |
|
"loss": 1.2008, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.9780566692352295, |
|
"learning_rate": 8.023767989533363e-05, |
|
"loss": 1.1919, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.9934610724449158, |
|
"learning_rate": 8.021587440034888e-05, |
|
"loss": 1.1813, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.1047219038009644, |
|
"learning_rate": 8.019406890536415e-05, |
|
"loss": 1.1887, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.0617597103118896, |
|
"learning_rate": 8.017226341037942e-05, |
|
"loss": 1.2142, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.9656373858451843, |
|
"learning_rate": 8.015045791539469e-05, |
|
"loss": 1.1962, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.9934256076812744, |
|
"learning_rate": 8.012865242040995e-05, |
|
"loss": 1.2093, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 1.0616453886032104, |
|
"learning_rate": 8.010684692542521e-05, |
|
"loss": 1.227, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 1.0761624574661255, |
|
"learning_rate": 8.008504143044048e-05, |
|
"loss": 1.2126, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 1.06252920627594, |
|
"learning_rate": 8.006323593545574e-05, |
|
"loss": 1.1966, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.9828883409500122, |
|
"learning_rate": 8.0041430440471e-05, |
|
"loss": 1.2032, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 1.0415362119674683, |
|
"learning_rate": 8.001962494548628e-05, |
|
"loss": 1.2069, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.9932116866111755, |
|
"learning_rate": 7.999781945050153e-05, |
|
"loss": 1.2099, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 1.0453740358352661, |
|
"learning_rate": 7.997601395551679e-05, |
|
"loss": 1.1908, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.9478277564048767, |
|
"learning_rate": 7.995420846053205e-05, |
|
"loss": 1.2016, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.9447776079177856, |
|
"learning_rate": 7.993240296554733e-05, |
|
"loss": 1.2163, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.9693462252616882, |
|
"learning_rate": 7.991059747056259e-05, |
|
"loss": 1.1871, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 1.2381738424301147, |
|
"learning_rate": 7.988879197557784e-05, |
|
"loss": 1.214, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.9551769495010376, |
|
"learning_rate": 7.98669864805931e-05, |
|
"loss": 1.2026, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 1.009376883506775, |
|
"learning_rate": 7.984518098560838e-05, |
|
"loss": 1.1991, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.9546257257461548, |
|
"learning_rate": 7.982337549062365e-05, |
|
"loss": 1.2164, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.9941860437393188, |
|
"learning_rate": 7.980156999563891e-05, |
|
"loss": 1.2111, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.211512565612793, |
|
"learning_rate": 7.977976450065416e-05, |
|
"loss": 1.1795, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.004779577255249, |
|
"learning_rate": 7.975795900566943e-05, |
|
"loss": 1.2049, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.0823005437850952, |
|
"learning_rate": 7.97361535106847e-05, |
|
"loss": 1.1886, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.0418225526809692, |
|
"learning_rate": 7.971434801569996e-05, |
|
"loss": 1.2105, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 1.1182845830917358, |
|
"learning_rate": 7.969254252071522e-05, |
|
"loss": 1.1897, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.946642279624939, |
|
"learning_rate": 7.967073702573049e-05, |
|
"loss": 1.199, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 1.1157629489898682, |
|
"learning_rate": 7.964893153074575e-05, |
|
"loss": 1.2294, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 1.053207516670227, |
|
"learning_rate": 7.962712603576101e-05, |
|
"loss": 1.2412, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.9756922721862793, |
|
"learning_rate": 7.960532054077629e-05, |
|
"loss": 1.1976, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 1.049428105354309, |
|
"learning_rate": 7.958351504579155e-05, |
|
"loss": 1.2254, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.9671922922134399, |
|
"learning_rate": 7.95617095508068e-05, |
|
"loss": 1.1905, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 1.0883835554122925, |
|
"learning_rate": 7.953990405582206e-05, |
|
"loss": 1.2032, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 1.080729365348816, |
|
"learning_rate": 7.951809856083734e-05, |
|
"loss": 1.216, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.9762791395187378, |
|
"learning_rate": 7.94962930658526e-05, |
|
"loss": 1.2167, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 1.1527519226074219, |
|
"learning_rate": 7.947448757086787e-05, |
|
"loss": 1.1682, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 1.0505051612854004, |
|
"learning_rate": 7.945268207588312e-05, |
|
"loss": 1.211, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 1.1166177988052368, |
|
"learning_rate": 7.94308765808984e-05, |
|
"loss": 1.1763, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 1.038783073425293, |
|
"learning_rate": 7.940907108591366e-05, |
|
"loss": 1.2113, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 1.0138919353485107, |
|
"learning_rate": 7.938726559092892e-05, |
|
"loss": 1.214, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.8989730477333069, |
|
"learning_rate": 7.936546009594418e-05, |
|
"loss": 1.1975, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.9866936206817627, |
|
"learning_rate": 7.934365460095945e-05, |
|
"loss": 1.2163, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.9352193474769592, |
|
"learning_rate": 7.932184910597471e-05, |
|
"loss": 1.1936, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.9865077137947083, |
|
"learning_rate": 7.930004361098997e-05, |
|
"loss": 1.2279, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.9269611835479736, |
|
"learning_rate": 7.927823811600523e-05, |
|
"loss": 1.2089, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 1.0865782499313354, |
|
"learning_rate": 7.92564326210205e-05, |
|
"loss": 1.2073, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 1.077241063117981, |
|
"learning_rate": 7.923462712603576e-05, |
|
"loss": 1.1952, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 1.1019902229309082, |
|
"learning_rate": 7.921282163105102e-05, |
|
"loss": 1.1845, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 1.1047565937042236, |
|
"learning_rate": 7.919101613606629e-05, |
|
"loss": 1.2115, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 1.038865327835083, |
|
"learning_rate": 7.916921064108156e-05, |
|
"loss": 1.1764, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 1.039838194847107, |
|
"learning_rate": 7.914740514609683e-05, |
|
"loss": 1.2061, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 1.1482833623886108, |
|
"learning_rate": 7.912559965111208e-05, |
|
"loss": 1.1819, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 1.2092708349227905, |
|
"learning_rate": 7.910379415612735e-05, |
|
"loss": 1.2204, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.9620797634124756, |
|
"learning_rate": 7.908198866114262e-05, |
|
"loss": 1.2282, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.9821200966835022, |
|
"learning_rate": 7.906018316615788e-05, |
|
"loss": 1.1928, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.9970041513442993, |
|
"learning_rate": 7.903837767117314e-05, |
|
"loss": 1.2293, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 1.0370044708251953, |
|
"learning_rate": 7.90165721761884e-05, |
|
"loss": 1.2015, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.9988645911216736, |
|
"learning_rate": 7.899476668120367e-05, |
|
"loss": 1.1827, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 1.0234349966049194, |
|
"learning_rate": 7.897296118621893e-05, |
|
"loss": 1.2185, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 1.1477036476135254, |
|
"learning_rate": 7.89511556912342e-05, |
|
"loss": 1.2108, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 1.1326051950454712, |
|
"learning_rate": 7.892935019624946e-05, |
|
"loss": 1.1785, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 1.003237009048462, |
|
"learning_rate": 7.890754470126472e-05, |
|
"loss": 1.2082, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 1.0607051849365234, |
|
"learning_rate": 7.888573920627998e-05, |
|
"loss": 1.2112, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 1.0867217779159546, |
|
"learning_rate": 7.886393371129525e-05, |
|
"loss": 1.1845, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.945563018321991, |
|
"learning_rate": 7.884212821631052e-05, |
|
"loss": 1.1925, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 1.0693022012710571, |
|
"learning_rate": 7.882032272132579e-05, |
|
"loss": 1.1956, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.9993180632591248, |
|
"learning_rate": 7.879851722634103e-05, |
|
"loss": 1.1965, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 1.010133147239685, |
|
"learning_rate": 7.87767117313563e-05, |
|
"loss": 1.2168, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 1.0953561067581177, |
|
"learning_rate": 7.875490623637157e-05, |
|
"loss": 1.2114, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.9444001317024231, |
|
"learning_rate": 7.873310074138684e-05, |
|
"loss": 1.1988, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.9980970621109009, |
|
"learning_rate": 7.87112952464021e-05, |
|
"loss": 1.2275, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.0584611892700195, |
|
"learning_rate": 7.868948975141735e-05, |
|
"loss": 1.2105, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.1327629089355469, |
|
"learning_rate": 7.866768425643263e-05, |
|
"loss": 1.2022, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.981350302696228, |
|
"learning_rate": 7.864587876144789e-05, |
|
"loss": 1.2151, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.1142750978469849, |
|
"learning_rate": 7.862407326646315e-05, |
|
"loss": 1.1931, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 1.0601882934570312, |
|
"learning_rate": 7.860226777147842e-05, |
|
"loss": 1.2141, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.9991333484649658, |
|
"learning_rate": 7.858046227649368e-05, |
|
"loss": 1.1921, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 1.1021018028259277, |
|
"learning_rate": 7.855865678150894e-05, |
|
"loss": 1.2225, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 1.0568020343780518, |
|
"learning_rate": 7.85368512865242e-05, |
|
"loss": 1.2427, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.9811879992485046, |
|
"learning_rate": 7.851504579153948e-05, |
|
"loss": 1.1997, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.0988446474075317, |
|
"learning_rate": 7.849324029655473e-05, |
|
"loss": 1.2156, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.0393906831741333, |
|
"learning_rate": 7.847143480157e-05, |
|
"loss": 1.2258, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.1017202138900757, |
|
"learning_rate": 7.844962930658526e-05, |
|
"loss": 1.2069, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.1102749109268188, |
|
"learning_rate": 7.842782381160053e-05, |
|
"loss": 1.2256, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.0270189046859741, |
|
"learning_rate": 7.84060183166158e-05, |
|
"loss": 1.2174, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 1.0221537351608276, |
|
"learning_rate": 7.838421282163106e-05, |
|
"loss": 1.1968, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.95604407787323, |
|
"learning_rate": 7.836240732664631e-05, |
|
"loss": 1.213, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.9393739700317383, |
|
"learning_rate": 7.834060183166159e-05, |
|
"loss": 1.2182, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 1.014799952507019, |
|
"learning_rate": 7.831879633667685e-05, |
|
"loss": 1.2021, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 1.0287479162216187, |
|
"learning_rate": 7.829699084169211e-05, |
|
"loss": 1.2114, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 1.0790306329727173, |
|
"learning_rate": 7.827736589620584e-05, |
|
"loss": 1.1874, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.9588958621025085, |
|
"learning_rate": 7.82555604012211e-05, |
|
"loss": 1.2191, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.9004745483398438, |
|
"learning_rate": 7.823375490623638e-05, |
|
"loss": 1.1933, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 1.0742331743240356, |
|
"learning_rate": 7.821194941125164e-05, |
|
"loss": 1.2128, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 1.072489857673645, |
|
"learning_rate": 7.81901439162669e-05, |
|
"loss": 1.2143, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.9534905552864075, |
|
"learning_rate": 7.816833842128217e-05, |
|
"loss": 1.2206, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 1.0694421529769897, |
|
"learning_rate": 7.814653292629743e-05, |
|
"loss": 1.2051, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.9729447364807129, |
|
"learning_rate": 7.81247274313127e-05, |
|
"loss": 1.2234, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 1.0395437479019165, |
|
"learning_rate": 7.810292193632796e-05, |
|
"loss": 1.1977, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.999451756477356, |
|
"learning_rate": 7.808111644134322e-05, |
|
"loss": 1.2053, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 1.1238023042678833, |
|
"learning_rate": 7.805931094635848e-05, |
|
"loss": 1.2295, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 1.0689754486083984, |
|
"learning_rate": 7.803750545137375e-05, |
|
"loss": 1.2059, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.9754849672317505, |
|
"learning_rate": 7.801569995638901e-05, |
|
"loss": 1.206, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 1.02662193775177, |
|
"learning_rate": 7.799389446140429e-05, |
|
"loss": 1.1967, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 1.1547129154205322, |
|
"learning_rate": 7.797208896641954e-05, |
|
"loss": 1.211, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.9812795519828796, |
|
"learning_rate": 7.79502834714348e-05, |
|
"loss": 1.1928, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 1.0706185102462769, |
|
"learning_rate": 7.792847797645006e-05, |
|
"loss": 1.1914, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 1.0410836935043335, |
|
"learning_rate": 7.790667248146534e-05, |
|
"loss": 1.2002, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.9746688008308411, |
|
"learning_rate": 7.78848669864806e-05, |
|
"loss": 1.1863, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.8778429627418518, |
|
"learning_rate": 7.786524204099433e-05, |
|
"loss": 1.2383, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.969650149345398, |
|
"learning_rate": 7.78434365460096e-05, |
|
"loss": 1.177, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 1.015781283378601, |
|
"learning_rate": 7.782163105102486e-05, |
|
"loss": 1.1838, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.8965770602226257, |
|
"learning_rate": 7.779982555604013e-05, |
|
"loss": 1.2175, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 1.007692575454712, |
|
"learning_rate": 7.77780200610554e-05, |
|
"loss": 1.1978, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.9334578514099121, |
|
"learning_rate": 7.775621456607065e-05, |
|
"loss": 1.1887, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.9570727348327637, |
|
"learning_rate": 7.773440907108591e-05, |
|
"loss": 1.211, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 1.0146620273590088, |
|
"learning_rate": 7.771260357610119e-05, |
|
"loss": 1.2188, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 1.0868462324142456, |
|
"learning_rate": 7.769079808111645e-05, |
|
"loss": 1.2147, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 1.062110185623169, |
|
"learning_rate": 7.766899258613171e-05, |
|
"loss": 1.2172, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.950108528137207, |
|
"learning_rate": 7.764718709114697e-05, |
|
"loss": 1.2077, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 1.029308795928955, |
|
"learning_rate": 7.762538159616224e-05, |
|
"loss": 1.2112, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.9809032678604126, |
|
"learning_rate": 7.76035761011775e-05, |
|
"loss": 1.2115, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.0070390701293945, |
|
"learning_rate": 7.758177060619276e-05, |
|
"loss": 1.2032, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.1221727132797241, |
|
"learning_rate": 7.755996511120803e-05, |
|
"loss": 1.2164, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.013219952583313, |
|
"learning_rate": 7.753815961622329e-05, |
|
"loss": 1.1912, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.0602985620498657, |
|
"learning_rate": 7.751635412123855e-05, |
|
"loss": 1.1607, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 1.009325385093689, |
|
"learning_rate": 7.749454862625382e-05, |
|
"loss": 1.1943, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 1.01610267162323, |
|
"learning_rate": 7.747274313126909e-05, |
|
"loss": 1.2036, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.9865471720695496, |
|
"learning_rate": 7.745093763628436e-05, |
|
"loss": 1.1951, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 1.1565035581588745, |
|
"learning_rate": 7.74291321412996e-05, |
|
"loss": 1.2132, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.9530940651893616, |
|
"learning_rate": 7.740732664631487e-05, |
|
"loss": 1.191, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 1.1055086851119995, |
|
"learning_rate": 7.738552115133014e-05, |
|
"loss": 1.2292, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 1.0695475339889526, |
|
"learning_rate": 7.736371565634541e-05, |
|
"loss": 1.1937, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.991439163684845, |
|
"learning_rate": 7.734191016136067e-05, |
|
"loss": 1.2117, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.9743112921714783, |
|
"learning_rate": 7.732010466637592e-05, |
|
"loss": 1.2275, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 1.030121922492981, |
|
"learning_rate": 7.72982991713912e-05, |
|
"loss": 1.1893, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 1.0691959857940674, |
|
"learning_rate": 7.727649367640646e-05, |
|
"loss": 1.2044, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 1.141326904296875, |
|
"learning_rate": 7.725468818142172e-05, |
|
"loss": 1.2208, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 1.0179444551467896, |
|
"learning_rate": 7.723288268643699e-05, |
|
"loss": 1.1901, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 1.1256074905395508, |
|
"learning_rate": 7.721107719145225e-05, |
|
"loss": 1.2, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 1.0997061729431152, |
|
"learning_rate": 7.718927169646751e-05, |
|
"loss": 1.194, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 1.0382623672485352, |
|
"learning_rate": 7.716746620148277e-05, |
|
"loss": 1.2277, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 1.0295804738998413, |
|
"learning_rate": 7.714566070649805e-05, |
|
"loss": 1.1857, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 1.0594016313552856, |
|
"learning_rate": 7.71238552115133e-05, |
|
"loss": 1.1955, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 1.0921293497085571, |
|
"learning_rate": 7.710204971652856e-05, |
|
"loss": 1.1836, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 1.0477246046066284, |
|
"learning_rate": 7.708024422154383e-05, |
|
"loss": 1.2023, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 1.0246959924697876, |
|
"learning_rate": 7.70584387265591e-05, |
|
"loss": 1.222, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 1.0640301704406738, |
|
"learning_rate": 7.703663323157437e-05, |
|
"loss": 1.1974, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 1.0652765035629272, |
|
"learning_rate": 7.701482773658963e-05, |
|
"loss": 1.1997, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.9220369458198547, |
|
"learning_rate": 7.699302224160488e-05, |
|
"loss": 1.212, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.9531814455986023, |
|
"learning_rate": 7.697121674662016e-05, |
|
"loss": 1.1686, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 1.1248044967651367, |
|
"learning_rate": 7.694941125163542e-05, |
|
"loss": 1.1971, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 1.0232545137405396, |
|
"learning_rate": 7.692760575665068e-05, |
|
"loss": 1.194, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 1.0724860429763794, |
|
"learning_rate": 7.690580026166594e-05, |
|
"loss": 1.1936, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.036474347114563, |
|
"learning_rate": 7.688399476668121e-05, |
|
"loss": 1.2078, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.0231555700302124, |
|
"learning_rate": 7.686218927169647e-05, |
|
"loss": 1.2056, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.9879153370857239, |
|
"learning_rate": 7.684038377671173e-05, |
|
"loss": 1.2191, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.0709577798843384, |
|
"learning_rate": 7.6818578281727e-05, |
|
"loss": 1.198, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.0138386487960815, |
|
"learning_rate": 7.679677278674226e-05, |
|
"loss": 1.2284, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.0676188468933105, |
|
"learning_rate": 7.677496729175752e-05, |
|
"loss": 1.2004, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.0372511148452759, |
|
"learning_rate": 7.675316179677279e-05, |
|
"loss": 1.167, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.0466020107269287, |
|
"learning_rate": 7.673135630178805e-05, |
|
"loss": 1.1958, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.0521596670150757, |
|
"learning_rate": 7.670955080680333e-05, |
|
"loss": 1.2025, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.9906710982322693, |
|
"learning_rate": 7.668774531181858e-05, |
|
"loss": 1.188, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 1.1713993549346924, |
|
"learning_rate": 7.666593981683384e-05, |
|
"loss": 1.1992, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 1.009819507598877, |
|
"learning_rate": 7.664413432184911e-05, |
|
"loss": 1.191, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 1.0150312185287476, |
|
"learning_rate": 7.662232882686438e-05, |
|
"loss": 1.1951, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.9645649790763855, |
|
"learning_rate": 7.660052333187964e-05, |
|
"loss": 1.1941, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 1.0158168077468872, |
|
"learning_rate": 7.65787178368949e-05, |
|
"loss": 1.1911, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 1.0730938911437988, |
|
"learning_rate": 7.655691234191017e-05, |
|
"loss": 1.1885, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 1.09099543094635, |
|
"learning_rate": 7.653510684692543e-05, |
|
"loss": 1.195, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.982562243938446, |
|
"learning_rate": 7.651330135194069e-05, |
|
"loss": 1.213, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 1.0173815488815308, |
|
"learning_rate": 7.649149585695596e-05, |
|
"loss": 1.1931, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 1.0644387006759644, |
|
"learning_rate": 7.646969036197122e-05, |
|
"loss": 1.2, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 1.0456851720809937, |
|
"learning_rate": 7.644788486698648e-05, |
|
"loss": 1.2267, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 1.0387489795684814, |
|
"learning_rate": 7.642607937200175e-05, |
|
"loss": 1.1818, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 1.034599781036377, |
|
"learning_rate": 7.640427387701701e-05, |
|
"loss": 1.1972, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.005964994430542, |
|
"learning_rate": 7.638246838203228e-05, |
|
"loss": 1.1882, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.0190836191177368, |
|
"learning_rate": 7.636066288704753e-05, |
|
"loss": 1.1819, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.010334849357605, |
|
"learning_rate": 7.63388573920628e-05, |
|
"loss": 1.2054, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.986047089099884, |
|
"learning_rate": 7.631705189707806e-05, |
|
"loss": 1.1831, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 1.0715646743774414, |
|
"learning_rate": 7.629524640209334e-05, |
|
"loss": 1.2143, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 1.0573137998580933, |
|
"learning_rate": 7.62734409071086e-05, |
|
"loss": 1.1765, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.9830726385116577, |
|
"learning_rate": 7.625163541212386e-05, |
|
"loss": 1.2195, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.9928615689277649, |
|
"learning_rate": 7.622982991713911e-05, |
|
"loss": 1.2052, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.916532039642334, |
|
"learning_rate": 7.620802442215439e-05, |
|
"loss": 1.2161, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 1.024786353111267, |
|
"learning_rate": 7.618621892716965e-05, |
|
"loss": 1.1841, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.9942538142204285, |
|
"learning_rate": 7.616441343218491e-05, |
|
"loss": 1.1969, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.9637119770050049, |
|
"learning_rate": 7.614260793720018e-05, |
|
"loss": 1.1839, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 1.0759954452514648, |
|
"learning_rate": 7.612080244221544e-05, |
|
"loss": 1.2087, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 1.1083338260650635, |
|
"learning_rate": 7.60989969472307e-05, |
|
"loss": 1.1637, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.9280533790588379, |
|
"learning_rate": 7.607719145224597e-05, |
|
"loss": 1.186, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 1.005856990814209, |
|
"learning_rate": 7.605538595726124e-05, |
|
"loss": 1.2096, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 1.0294781923294067, |
|
"learning_rate": 7.603358046227649e-05, |
|
"loss": 1.1933, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 1.129011631011963, |
|
"learning_rate": 7.601177496729176e-05, |
|
"loss": 1.1975, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.9473848938941956, |
|
"learning_rate": 7.598996947230702e-05, |
|
"loss": 1.191, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 1.0725443363189697, |
|
"learning_rate": 7.59681639773223e-05, |
|
"loss": 1.2069, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 1.0083664655685425, |
|
"learning_rate": 7.594635848233756e-05, |
|
"loss": 1.2012, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 1.0504008531570435, |
|
"learning_rate": 7.592455298735281e-05, |
|
"loss": 1.1897, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 1.02128267288208, |
|
"learning_rate": 7.590274749236807e-05, |
|
"loss": 1.193, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 1.043655276298523, |
|
"learning_rate": 7.588094199738335e-05, |
|
"loss": 1.1984, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 1.0775086879730225, |
|
"learning_rate": 7.585913650239861e-05, |
|
"loss": 1.1826, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 1.0672656297683716, |
|
"learning_rate": 7.583733100741387e-05, |
|
"loss": 1.221, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 1.1105164289474487, |
|
"learning_rate": 7.581552551242914e-05, |
|
"loss": 1.2124, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.978393018245697, |
|
"learning_rate": 7.57937200174444e-05, |
|
"loss": 1.1749, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 1.0011403560638428, |
|
"learning_rate": 7.577191452245966e-05, |
|
"loss": 1.1987, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.9928615093231201, |
|
"learning_rate": 7.575010902747493e-05, |
|
"loss": 1.1916, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.9368339776992798, |
|
"learning_rate": 7.572830353249019e-05, |
|
"loss": 1.2155, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 1.0176599025726318, |
|
"learning_rate": 7.570649803750545e-05, |
|
"loss": 1.2108, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.956798255443573, |
|
"learning_rate": 7.568469254252072e-05, |
|
"loss": 1.1951, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.9456045627593994, |
|
"learning_rate": 7.566288704753598e-05, |
|
"loss": 1.1939, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 1.1099495887756348, |
|
"learning_rate": 7.564108155255125e-05, |
|
"loss": 1.2113, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 1.0258333683013916, |
|
"learning_rate": 7.561927605756652e-05, |
|
"loss": 1.1723, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 1.0410195589065552, |
|
"learning_rate": 7.559747056258177e-05, |
|
"loss": 1.182, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.9671265482902527, |
|
"learning_rate": 7.557566506759703e-05, |
|
"loss": 1.2038, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.9647257328033447, |
|
"learning_rate": 7.555385957261231e-05, |
|
"loss": 1.2078, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 1.0497002601623535, |
|
"learning_rate": 7.553205407762757e-05, |
|
"loss": 1.2053, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 1.080557107925415, |
|
"learning_rate": 7.551024858264283e-05, |
|
"loss": 1.1925, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.967833936214447, |
|
"learning_rate": 7.54884430876581e-05, |
|
"loss": 1.2106, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.1252259016036987, |
|
"learning_rate": 7.546663759267336e-05, |
|
"loss": 1.2035, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.021498203277588, |
|
"learning_rate": 7.544483209768862e-05, |
|
"loss": 1.1748, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.1426560878753662, |
|
"learning_rate": 7.542302660270389e-05, |
|
"loss": 1.1916, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.9883751273155212, |
|
"learning_rate": 7.540122110771915e-05, |
|
"loss": 1.1808, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.9893055558204651, |
|
"learning_rate": 7.537941561273441e-05, |
|
"loss": 1.1961, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.038801908493042, |
|
"learning_rate": 7.535761011774967e-05, |
|
"loss": 1.1813, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.9812270998954773, |
|
"learning_rate": 7.533580462276494e-05, |
|
"loss": 1.1873, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.0793439149856567, |
|
"learning_rate": 7.53139991277802e-05, |
|
"loss": 1.1858, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 1.0743041038513184, |
|
"learning_rate": 7.529219363279548e-05, |
|
"loss": 1.1788, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 1.1196831464767456, |
|
"learning_rate": 7.527038813781073e-05, |
|
"loss": 1.2059, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 1.0126169919967651, |
|
"learning_rate": 7.524858264282599e-05, |
|
"loss": 1.2101, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 1.2069376707077026, |
|
"learning_rate": 7.522677714784125e-05, |
|
"loss": 1.1964, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.9865954518318176, |
|
"learning_rate": 7.520497165285653e-05, |
|
"loss": 1.1966, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.9862752556800842, |
|
"learning_rate": 7.518316615787179e-05, |
|
"loss": 1.1954, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 1.093674659729004, |
|
"learning_rate": 7.516136066288704e-05, |
|
"loss": 1.1931, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 1.0402370691299438, |
|
"learning_rate": 7.513955516790232e-05, |
|
"loss": 1.1834, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.9660056233406067, |
|
"learning_rate": 7.511774967291758e-05, |
|
"loss": 1.1978, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 1.1045291423797607, |
|
"learning_rate": 7.509594417793284e-05, |
|
"loss": 1.1789, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.1806862354278564, |
|
"learning_rate": 7.507413868294811e-05, |
|
"loss": 1.1849, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.0600950717926025, |
|
"learning_rate": 7.505233318796337e-05, |
|
"loss": 1.1863, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.2518783807754517, |
|
"learning_rate": 7.503052769297863e-05, |
|
"loss": 1.1911, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.0559264421463013, |
|
"learning_rate": 7.50087221979939e-05, |
|
"loss": 1.2106, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.9558138847351074, |
|
"learning_rate": 7.498691670300916e-05, |
|
"loss": 1.1719, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 1.0867066383361816, |
|
"learning_rate": 7.496511120802444e-05, |
|
"loss": 1.2209, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.9424611926078796, |
|
"learning_rate": 7.494330571303969e-05, |
|
"loss": 1.1812, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 1.04227614402771, |
|
"learning_rate": 7.492150021805495e-05, |
|
"loss": 1.204, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.9230485558509827, |
|
"learning_rate": 7.489969472307021e-05, |
|
"loss": 1.1923, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 1.079827070236206, |
|
"learning_rate": 7.487788922808549e-05, |
|
"loss": 1.1633, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 1.0158615112304688, |
|
"learning_rate": 7.485608373310075e-05, |
|
"loss": 1.1828, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 1.0298587083816528, |
|
"learning_rate": 7.4834278238116e-05, |
|
"loss": 1.2046, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 1.1021103858947754, |
|
"learning_rate": 7.481247274313126e-05, |
|
"loss": 1.2369, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 1.0776439905166626, |
|
"learning_rate": 7.479066724814654e-05, |
|
"loss": 1.1884, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 1.0745654106140137, |
|
"learning_rate": 7.47688617531618e-05, |
|
"loss": 1.1915, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.9988030791282654, |
|
"learning_rate": 7.474705625817707e-05, |
|
"loss": 1.1783, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.9837521910667419, |
|
"learning_rate": 7.472525076319233e-05, |
|
"loss": 1.1859, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 1.076101541519165, |
|
"learning_rate": 7.470344526820759e-05, |
|
"loss": 1.194, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 1.0141769647598267, |
|
"learning_rate": 7.468163977322286e-05, |
|
"loss": 1.1893, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.9962597489356995, |
|
"learning_rate": 7.465983427823812e-05, |
|
"loss": 1.2143, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 1.0923272371292114, |
|
"learning_rate": 7.46380287832534e-05, |
|
"loss": 1.184, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 1.1431857347488403, |
|
"learning_rate": 7.461622328826864e-05, |
|
"loss": 1.1926, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 1.0489574670791626, |
|
"learning_rate": 7.459441779328391e-05, |
|
"loss": 1.1584, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 1.049176812171936, |
|
"learning_rate": 7.457261229829917e-05, |
|
"loss": 1.2145, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 1.0617070198059082, |
|
"learning_rate": 7.455080680331445e-05, |
|
"loss": 1.1821, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 1.1978720426559448, |
|
"learning_rate": 7.452900130832971e-05, |
|
"loss": 1.1832, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 1.0322489738464355, |
|
"learning_rate": 7.450719581334496e-05, |
|
"loss": 1.1978, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 1.0497206449508667, |
|
"learning_rate": 7.448539031836022e-05, |
|
"loss": 1.1771, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 1.0136041641235352, |
|
"learning_rate": 7.44635848233755e-05, |
|
"loss": 1.198, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 1.0500036478042603, |
|
"learning_rate": 7.444177932839076e-05, |
|
"loss": 1.2019, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 1.0009404420852661, |
|
"learning_rate": 7.441997383340603e-05, |
|
"loss": 1.197, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 1.1604543924331665, |
|
"learning_rate": 7.439816833842127e-05, |
|
"loss": 1.1921, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.0473634004592896, |
|
"learning_rate": 7.437636284343655e-05, |
|
"loss": 1.1718, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.0517455339431763, |
|
"learning_rate": 7.435455734845181e-05, |
|
"loss": 1.1721, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.0030772686004639, |
|
"learning_rate": 7.433275185346708e-05, |
|
"loss": 1.1942, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.067175269126892, |
|
"learning_rate": 7.431094635848234e-05, |
|
"loss": 1.2015, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 1.0570900440216064, |
|
"learning_rate": 7.42891408634976e-05, |
|
"loss": 1.1715, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 1.0768860578536987, |
|
"learning_rate": 7.426733536851287e-05, |
|
"loss": 1.2118, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.9864534139633179, |
|
"learning_rate": 7.424552987352813e-05, |
|
"loss": 1.211, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.9961116909980774, |
|
"learning_rate": 7.422372437854339e-05, |
|
"loss": 1.1726, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 1.149584174156189, |
|
"learning_rate": 7.420191888355867e-05, |
|
"loss": 1.2015, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.9385210275650024, |
|
"learning_rate": 7.418011338857392e-05, |
|
"loss": 1.1853, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.9972238540649414, |
|
"learning_rate": 7.415830789358918e-05, |
|
"loss": 1.1862, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 1.1037793159484863, |
|
"learning_rate": 7.413650239860446e-05, |
|
"loss": 1.2191, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 1.082542896270752, |
|
"learning_rate": 7.411469690361972e-05, |
|
"loss": 1.2079, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 1.103800892829895, |
|
"learning_rate": 7.409289140863498e-05, |
|
"loss": 1.2069, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 1.1348109245300293, |
|
"learning_rate": 7.407108591365023e-05, |
|
"loss": 1.1853, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 1.0272557735443115, |
|
"learning_rate": 7.404928041866551e-05, |
|
"loss": 1.206, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 1.06856369972229, |
|
"learning_rate": 7.402747492368077e-05, |
|
"loss": 1.2077, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.9664187431335449, |
|
"learning_rate": 7.400566942869604e-05, |
|
"loss": 1.2252, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 1.0753014087677002, |
|
"learning_rate": 7.39838639337113e-05, |
|
"loss": 1.2033, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 1.1803292036056519, |
|
"learning_rate": 7.396205843872656e-05, |
|
"loss": 1.1944, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.9899237155914307, |
|
"learning_rate": 7.394025294374183e-05, |
|
"loss": 1.1768, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 1.0693211555480957, |
|
"learning_rate": 7.391844744875709e-05, |
|
"loss": 1.1957, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 1.0212500095367432, |
|
"learning_rate": 7.389664195377235e-05, |
|
"loss": 1.1807, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.9626917839050293, |
|
"learning_rate": 7.387483645878763e-05, |
|
"loss": 1.2019, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 1.0324492454528809, |
|
"learning_rate": 7.385303096380288e-05, |
|
"loss": 1.1787, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 1.0183689594268799, |
|
"learning_rate": 7.383122546881814e-05, |
|
"loss": 1.1718, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 1.03179132938385, |
|
"learning_rate": 7.38094199738334e-05, |
|
"loss": 1.1684, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 1.0151221752166748, |
|
"learning_rate": 7.378761447884868e-05, |
|
"loss": 1.1754, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 1.0675002336502075, |
|
"learning_rate": 7.376580898386394e-05, |
|
"loss": 1.1964, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 0.9424752593040466, |
|
"learning_rate": 7.374400348887919e-05, |
|
"loss": 1.1994, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 1.0181151628494263, |
|
"learning_rate": 7.372219799389446e-05, |
|
"loss": 1.1943, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 1.0865308046340942, |
|
"learning_rate": 7.370039249890973e-05, |
|
"loss": 1.1703, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.043016791343689, |
|
"learning_rate": 7.3678587003925e-05, |
|
"loss": 1.1813, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.060164213180542, |
|
"learning_rate": 7.365678150894026e-05, |
|
"loss": 1.1769, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.0264476537704468, |
|
"learning_rate": 7.363497601395552e-05, |
|
"loss": 1.1895, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.0359675884246826, |
|
"learning_rate": 7.361317051897078e-05, |
|
"loss": 1.1773, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.0558348894119263, |
|
"learning_rate": 7.359136502398605e-05, |
|
"loss": 1.2011, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 1.0487242937088013, |
|
"learning_rate": 7.356955952900131e-05, |
|
"loss": 1.2145, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 1.0390251874923706, |
|
"learning_rate": 7.354775403401657e-05, |
|
"loss": 1.1771, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.9608905911445618, |
|
"learning_rate": 7.352594853903184e-05, |
|
"loss": 1.1988, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.9924561977386475, |
|
"learning_rate": 7.35041430440471e-05, |
|
"loss": 1.2049, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.9115813970565796, |
|
"learning_rate": 7.348233754906236e-05, |
|
"loss": 1.185, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.9227597713470459, |
|
"learning_rate": 7.346053205407764e-05, |
|
"loss": 1.1964, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 1.1192283630371094, |
|
"learning_rate": 7.34387265590929e-05, |
|
"loss": 1.1927, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.9770265817642212, |
|
"learning_rate": 7.341692106410815e-05, |
|
"loss": 1.197, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 1.0701338052749634, |
|
"learning_rate": 7.339511556912341e-05, |
|
"loss": 1.1834, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 1.0348602533340454, |
|
"learning_rate": 7.337331007413869e-05, |
|
"loss": 1.2115, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 1.0927150249481201, |
|
"learning_rate": 7.335150457915395e-05, |
|
"loss": 1.2032, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 1.0548428297042847, |
|
"learning_rate": 7.332969908416922e-05, |
|
"loss": 1.1962, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.9672625064849854, |
|
"learning_rate": 7.330789358918447e-05, |
|
"loss": 1.1761, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.9257100820541382, |
|
"learning_rate": 7.328608809419974e-05, |
|
"loss": 1.2007, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 1.0286579132080078, |
|
"learning_rate": 7.3264282599215e-05, |
|
"loss": 1.1988, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 1.153806447982788, |
|
"learning_rate": 7.324247710423027e-05, |
|
"loss": 1.207, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.9337689876556396, |
|
"learning_rate": 7.322067160924553e-05, |
|
"loss": 1.2006, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.9721220135688782, |
|
"learning_rate": 7.31988661142608e-05, |
|
"loss": 1.2014, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 1.158456802368164, |
|
"learning_rate": 7.317706061927606e-05, |
|
"loss": 1.2074, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 1.0969914197921753, |
|
"learning_rate": 7.315525512429132e-05, |
|
"loss": 1.207, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 0.9585858583450317, |
|
"learning_rate": 7.31334496293066e-05, |
|
"loss": 1.1783, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 1.0447596311569214, |
|
"learning_rate": 7.311164413432186e-05, |
|
"loss": 1.1662, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 1.0252220630645752, |
|
"learning_rate": 7.308983863933711e-05, |
|
"loss": 1.1891, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 1.075294017791748, |
|
"learning_rate": 7.306803314435237e-05, |
|
"loss": 1.1917, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 1.0980489253997803, |
|
"learning_rate": 7.304622764936765e-05, |
|
"loss": 1.1829, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 1.0682340860366821, |
|
"learning_rate": 7.302442215438291e-05, |
|
"loss": 1.1859, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 1.0863393545150757, |
|
"learning_rate": 7.300261665939818e-05, |
|
"loss": 1.188, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.0569467544555664, |
|
"learning_rate": 7.298081116441343e-05, |
|
"loss": 1.1962, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.0733450651168823, |
|
"learning_rate": 7.29590056694287e-05, |
|
"loss": 1.1934, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.0762420892715454, |
|
"learning_rate": 7.293720017444397e-05, |
|
"loss": 1.181, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.0010732412338257, |
|
"learning_rate": 7.291539467945923e-05, |
|
"loss": 1.1936, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.039819598197937, |
|
"learning_rate": 7.289358918447449e-05, |
|
"loss": 1.2001, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.1060088872909546, |
|
"learning_rate": 7.287178368948975e-05, |
|
"loss": 1.2056, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.9314666986465454, |
|
"learning_rate": 7.284997819450502e-05, |
|
"loss": 1.1748, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.2504175901412964, |
|
"learning_rate": 7.282817269952028e-05, |
|
"loss": 1.1737, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.1391412019729614, |
|
"learning_rate": 7.280636720453554e-05, |
|
"loss": 1.1909, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.0052971839904785, |
|
"learning_rate": 7.278456170955081e-05, |
|
"loss": 1.1902, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.1059855222702026, |
|
"learning_rate": 7.276275621456607e-05, |
|
"loss": 1.2021, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.0115567445755005, |
|
"learning_rate": 7.274095071958133e-05, |
|
"loss": 1.1512, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.0905554294586182, |
|
"learning_rate": 7.27191452245966e-05, |
|
"loss": 1.1884, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.023762583732605, |
|
"learning_rate": 7.269733972961187e-05, |
|
"loss": 1.1841, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 1.0214531421661377, |
|
"learning_rate": 7.267553423462714e-05, |
|
"loss": 1.185, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 1.043494701385498, |
|
"learning_rate": 7.265372873964239e-05, |
|
"loss": 1.1822, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 1.0787135362625122, |
|
"learning_rate": 7.263192324465766e-05, |
|
"loss": 1.1827, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 1.1063132286071777, |
|
"learning_rate": 7.261011774967292e-05, |
|
"loss": 1.1847, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 1.0400912761688232, |
|
"learning_rate": 7.258831225468819e-05, |
|
"loss": 1.1603, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 1.057569146156311, |
|
"learning_rate": 7.256650675970345e-05, |
|
"loss": 1.1713, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 1.0713859796524048, |
|
"learning_rate": 7.254470126471871e-05, |
|
"loss": 1.2167, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 1.0643656253814697, |
|
"learning_rate": 7.252289576973398e-05, |
|
"loss": 1.1744, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 1.1218703985214233, |
|
"learning_rate": 7.250109027474924e-05, |
|
"loss": 1.2183, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.9932084083557129, |
|
"learning_rate": 7.24792847797645e-05, |
|
"loss": 1.1774, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 1.063856840133667, |
|
"learning_rate": 7.245747928477977e-05, |
|
"loss": 1.1519, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 1.0655205249786377, |
|
"learning_rate": 7.243567378979503e-05, |
|
"loss": 1.1883, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.9149487018585205, |
|
"learning_rate": 7.241386829481029e-05, |
|
"loss": 1.1636, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 1.061606764793396, |
|
"learning_rate": 7.239206279982556e-05, |
|
"loss": 1.1933, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.026875376701355, |
|
"learning_rate": 7.237025730484083e-05, |
|
"loss": 1.1697, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.9857021570205688, |
|
"learning_rate": 7.23484518098561e-05, |
|
"loss": 1.1593, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.0682117938995361, |
|
"learning_rate": 7.232664631487134e-05, |
|
"loss": 1.1846, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.9390698671340942, |
|
"learning_rate": 7.230484081988661e-05, |
|
"loss": 1.1625, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.0105453729629517, |
|
"learning_rate": 7.228303532490188e-05, |
|
"loss": 1.1929, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.986284077167511, |
|
"learning_rate": 7.226122982991715e-05, |
|
"loss": 1.1973, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 1.0369880199432373, |
|
"learning_rate": 7.223942433493241e-05, |
|
"loss": 1.1996, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 1.1171998977661133, |
|
"learning_rate": 7.221761883994766e-05, |
|
"loss": 1.2022, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 1.0862730741500854, |
|
"learning_rate": 7.219581334496294e-05, |
|
"loss": 1.19, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 1.0609533786773682, |
|
"learning_rate": 7.21740078499782e-05, |
|
"loss": 1.1825, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.98408043384552, |
|
"learning_rate": 7.215220235499346e-05, |
|
"loss": 1.1766, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 1.0378422737121582, |
|
"learning_rate": 7.213039686000873e-05, |
|
"loss": 1.1843, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.9478686451911926, |
|
"learning_rate": 7.210859136502399e-05, |
|
"loss": 1.1728, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 1.0276613235473633, |
|
"learning_rate": 7.208678587003925e-05, |
|
"loss": 1.1796, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.9244964122772217, |
|
"learning_rate": 7.206498037505451e-05, |
|
"loss": 1.1812, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.0720821619033813, |
|
"learning_rate": 7.204317488006979e-05, |
|
"loss": 1.1597, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.0820330381393433, |
|
"learning_rate": 7.202136938508504e-05, |
|
"loss": 1.1981, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.9590197205543518, |
|
"learning_rate": 7.19995638901003e-05, |
|
"loss": 1.1898, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 1.0559465885162354, |
|
"learning_rate": 7.197775839511557e-05, |
|
"loss": 1.1985, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.9392025470733643, |
|
"learning_rate": 7.195595290013084e-05, |
|
"loss": 1.1933, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 1.1029566526412964, |
|
"learning_rate": 7.19341474051461e-05, |
|
"loss": 1.1733, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 1.0255013704299927, |
|
"learning_rate": 7.191234191016137e-05, |
|
"loss": 1.1762, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 1.0394928455352783, |
|
"learning_rate": 7.189053641517662e-05, |
|
"loss": 1.151, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 1.057391881942749, |
|
"learning_rate": 7.18687309201919e-05, |
|
"loss": 1.1761, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 1.0358378887176514, |
|
"learning_rate": 7.184692542520716e-05, |
|
"loss": 1.1911, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 1.0503947734832764, |
|
"learning_rate": 7.182511993022242e-05, |
|
"loss": 1.2198, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 1.0237114429473877, |
|
"learning_rate": 7.180331443523768e-05, |
|
"loss": 1.2043, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.9386830925941467, |
|
"learning_rate": 7.178150894025295e-05, |
|
"loss": 1.192, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.9386530518531799, |
|
"learning_rate": 7.175970344526821e-05, |
|
"loss": 1.1864, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.9574694633483887, |
|
"learning_rate": 7.173789795028347e-05, |
|
"loss": 1.1828, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 1.0528520345687866, |
|
"learning_rate": 7.171609245529874e-05, |
|
"loss": 1.1861, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 1.0283684730529785, |
|
"learning_rate": 7.1694286960314e-05, |
|
"loss": 1.1749, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.9847733974456787, |
|
"learning_rate": 7.167248146532926e-05, |
|
"loss": 1.1903, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 1.0302000045776367, |
|
"learning_rate": 7.165067597034453e-05, |
|
"loss": 1.1852, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 1.0097705125808716, |
|
"learning_rate": 7.16288704753598e-05, |
|
"loss": 1.1874, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 1.1593202352523804, |
|
"learning_rate": 7.160706498037506e-05, |
|
"loss": 1.1827, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.9892207384109497, |
|
"learning_rate": 7.158525948539033e-05, |
|
"loss": 1.1694, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 1.0846501588821411, |
|
"learning_rate": 7.156345399040558e-05, |
|
"loss": 1.1892, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 1.014400601387024, |
|
"learning_rate": 7.154164849542085e-05, |
|
"loss": 1.1806, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 1.0073882341384888, |
|
"learning_rate": 7.151984300043612e-05, |
|
"loss": 1.1781, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 1.2205009460449219, |
|
"learning_rate": 7.149803750545138e-05, |
|
"loss": 1.1757, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 1.058864951133728, |
|
"learning_rate": 7.147623201046664e-05, |
|
"loss": 1.1968, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 1.0327656269073486, |
|
"learning_rate": 7.14544265154819e-05, |
|
"loss": 1.2232, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 1.042557954788208, |
|
"learning_rate": 7.143262102049717e-05, |
|
"loss": 1.2254, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.9692584276199341, |
|
"learning_rate": 7.141081552551243e-05, |
|
"loss": 1.1757, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 1.0381295680999756, |
|
"learning_rate": 7.13890100305277e-05, |
|
"loss": 1.1961, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.0239328145980835, |
|
"learning_rate": 7.136720453554296e-05, |
|
"loss": 1.155, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.0357582569122314, |
|
"learning_rate": 7.134539904055822e-05, |
|
"loss": 1.1719, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.0303056240081787, |
|
"learning_rate": 7.132359354557348e-05, |
|
"loss": 1.1934, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.9931465983390808, |
|
"learning_rate": 7.130178805058875e-05, |
|
"loss": 1.1791, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.0507264137268066, |
|
"learning_rate": 7.127998255560402e-05, |
|
"loss": 1.184, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 1.0703891515731812, |
|
"learning_rate": 7.125817706061927e-05, |
|
"loss": 1.1853, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 0.9957337975502014, |
|
"learning_rate": 7.123637156563454e-05, |
|
"loss": 1.1702, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 1.1027911901474, |
|
"learning_rate": 7.12145660706498e-05, |
|
"loss": 1.1968, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 0.9877254366874695, |
|
"learning_rate": 7.119276057566508e-05, |
|
"loss": 1.1752, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 1.0115269422531128, |
|
"learning_rate": 7.117095508068034e-05, |
|
"loss": 1.1546, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.9738414287567139, |
|
"learning_rate": 7.11491495856956e-05, |
|
"loss": 1.1576, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 1.0419977903366089, |
|
"learning_rate": 7.112734409071087e-05, |
|
"loss": 1.1927, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 1.0933623313903809, |
|
"learning_rate": 7.110553859572613e-05, |
|
"loss": 1.1747, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 1.0882395505905151, |
|
"learning_rate": 7.108373310074139e-05, |
|
"loss": 1.189, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.9442359209060669, |
|
"learning_rate": 7.106192760575665e-05, |
|
"loss": 1.1826, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 1.0601658821105957, |
|
"learning_rate": 7.104012211077192e-05, |
|
"loss": 1.1854, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 1.0670174360275269, |
|
"learning_rate": 7.101831661578718e-05, |
|
"loss": 1.1893, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 1.0757992267608643, |
|
"learning_rate": 7.099651112080244e-05, |
|
"loss": 1.1984, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 1.0340900421142578, |
|
"learning_rate": 7.09747056258177e-05, |
|
"loss": 1.2068, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 1.0402545928955078, |
|
"learning_rate": 7.095290013083298e-05, |
|
"loss": 1.208, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 1.1371444463729858, |
|
"learning_rate": 7.093109463584823e-05, |
|
"loss": 1.1883, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 1.0464153289794922, |
|
"learning_rate": 7.09092891408635e-05, |
|
"loss": 1.1896, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.9860671758651733, |
|
"learning_rate": 7.088748364587876e-05, |
|
"loss": 1.1782, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.927305281162262, |
|
"learning_rate": 7.086567815089404e-05, |
|
"loss": 1.1759, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 1.0116522312164307, |
|
"learning_rate": 7.08438726559093e-05, |
|
"loss": 1.1845, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 1.0394808053970337, |
|
"learning_rate": 7.082206716092456e-05, |
|
"loss": 1.1949, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 1.1558239459991455, |
|
"learning_rate": 7.080026166593981e-05, |
|
"loss": 1.1758, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.9348282217979431, |
|
"learning_rate": 7.077845617095509e-05, |
|
"loss": 1.1976, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.9124108552932739, |
|
"learning_rate": 7.075665067597035e-05, |
|
"loss": 1.172, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 1.077690839767456, |
|
"learning_rate": 7.073484518098561e-05, |
|
"loss": 1.1835, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.9495044350624084, |
|
"learning_rate": 7.071303968600088e-05, |
|
"loss": 1.1682, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.9947417378425598, |
|
"learning_rate": 7.069123419101614e-05, |
|
"loss": 1.2216, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 1.072772741317749, |
|
"learning_rate": 7.06694286960314e-05, |
|
"loss": 1.2006, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 1.0669934749603271, |
|
"learning_rate": 7.064762320104667e-05, |
|
"loss": 1.1992, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 1.0894432067871094, |
|
"learning_rate": 7.062581770606194e-05, |
|
"loss": 1.1745, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 0.9627017378807068, |
|
"learning_rate": 7.060401221107719e-05, |
|
"loss": 1.1818, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 0.9909853935241699, |
|
"learning_rate": 7.058220671609245e-05, |
|
"loss": 1.1705, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.0125415325164795, |
|
"learning_rate": 7.056040122110772e-05, |
|
"loss": 1.211, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.9729527235031128, |
|
"learning_rate": 7.0538595726123e-05, |
|
"loss": 1.1658, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.0256701707839966, |
|
"learning_rate": 7.051679023113826e-05, |
|
"loss": 1.1657, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.0687954425811768, |
|
"learning_rate": 7.04949847361535e-05, |
|
"loss": 1.1648, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.9713466167449951, |
|
"learning_rate": 7.047317924116877e-05, |
|
"loss": 1.1774, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 1.0809965133666992, |
|
"learning_rate": 7.045137374618405e-05, |
|
"loss": 1.1658, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 1.0827128887176514, |
|
"learning_rate": 7.042956825119931e-05, |
|
"loss": 1.1639, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 1.112669825553894, |
|
"learning_rate": 7.040776275621457e-05, |
|
"loss": 1.1743, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.9779360890388489, |
|
"learning_rate": 7.038595726122984e-05, |
|
"loss": 1.1823, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.0385786294937134, |
|
"learning_rate": 7.03641517662451e-05, |
|
"loss": 1.1804, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.05619215965271, |
|
"learning_rate": 7.034234627126036e-05, |
|
"loss": 1.1936, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.28429114818573, |
|
"eval_runtime": 1501.2758, |
|
"eval_samples_per_second": 257.68, |
|
"eval_steps_per_second": 4.027, |
|
"step": 13818 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 46060, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 5.893571450073252e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|