{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4877, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002050440844781628, "grad_norm": 48.415470123291016, "learning_rate": 1.360544217687075e-07, "loss": 2.4442, "step": 1 }, { "epoch": 0.0004100881689563256, "grad_norm": 82.44342041015625, "learning_rate": 2.72108843537415e-07, "loss": 2.8507, "step": 2 }, { "epoch": 0.0006151322534344885, "grad_norm": 63.70850372314453, "learning_rate": 4.0816326530612243e-07, "loss": 2.6333, "step": 3 }, { "epoch": 0.0008201763379126512, "grad_norm": 64.90351104736328, "learning_rate": 5.4421768707483e-07, "loss": 2.6183, "step": 4 }, { "epoch": 0.001025220422390814, "grad_norm": 59.678951263427734, "learning_rate": 6.802721088435376e-07, "loss": 2.493, "step": 5 }, { "epoch": 0.001230264506868977, "grad_norm": 65.12047576904297, "learning_rate": 8.163265306122449e-07, "loss": 2.7567, "step": 6 }, { "epoch": 0.0014353085913471396, "grad_norm": 57.2817268371582, "learning_rate": 9.523809523809525e-07, "loss": 2.5794, "step": 7 }, { "epoch": 0.0016403526758253025, "grad_norm": 48.27614212036133, "learning_rate": 1.08843537414966e-06, "loss": 2.4565, "step": 8 }, { "epoch": 0.0018453967603034652, "grad_norm": 45.09442901611328, "learning_rate": 1.2244897959183673e-06, "loss": 2.5086, "step": 9 }, { "epoch": 0.002050440844781628, "grad_norm": 40.27000427246094, "learning_rate": 1.3605442176870751e-06, "loss": 2.1193, "step": 10 }, { "epoch": 0.002255484929259791, "grad_norm": 49.388851165771484, "learning_rate": 1.4965986394557825e-06, "loss": 2.3226, "step": 11 }, { "epoch": 0.002460529013737954, "grad_norm": 32.29252243041992, "learning_rate": 1.6326530612244897e-06, "loss": 2.0508, "step": 12 }, { "epoch": 0.0026655730982161163, "grad_norm": 27.946195602416992, "learning_rate": 1.7687074829931975e-06, "loss": 1.8215, "step": 13 }, { "epoch": 0.002870617182694279, "grad_norm": 41.25271224975586, "learning_rate": 1.904761904761905e-06, "loss": 1.9104, "step": 14 }, { "epoch": 0.003075661267172442, "grad_norm": 23.641109466552734, "learning_rate": 2.0408163265306125e-06, "loss": 1.7023, "step": 15 }, { "epoch": 0.003280705351650605, "grad_norm": 14.374492645263672, "learning_rate": 2.17687074829932e-06, "loss": 1.6494, "step": 16 }, { "epoch": 0.003485749436128768, "grad_norm": 7.344822883605957, "learning_rate": 2.3129251700680273e-06, "loss": 1.5615, "step": 17 }, { "epoch": 0.0036907935206069303, "grad_norm": 6.550902366638184, "learning_rate": 2.4489795918367347e-06, "loss": 1.3896, "step": 18 }, { "epoch": 0.0038958376050850932, "grad_norm": 6.488000869750977, "learning_rate": 2.5850340136054425e-06, "loss": 1.5318, "step": 19 }, { "epoch": 0.004100881689563256, "grad_norm": 5.305393218994141, "learning_rate": 2.7210884353741503e-06, "loss": 1.4865, "step": 20 }, { "epoch": 0.004305925774041419, "grad_norm": 5.509463310241699, "learning_rate": 2.8571428571428573e-06, "loss": 1.4294, "step": 21 }, { "epoch": 0.004510969858519582, "grad_norm": 5.078335285186768, "learning_rate": 2.993197278911565e-06, "loss": 1.5342, "step": 22 }, { "epoch": 0.004716013942997745, "grad_norm": 4.89779806137085, "learning_rate": 3.1292517006802725e-06, "loss": 1.4057, "step": 23 }, { "epoch": 0.004921058027475908, "grad_norm": 4.147183418273926, "learning_rate": 3.2653061224489794e-06, "loss": 1.3387, "step": 24 }, { "epoch": 0.00512610211195407, "grad_norm": 3.3408122062683105, "learning_rate": 3.4013605442176872e-06, "loss": 1.3886, "step": 25 }, { "epoch": 0.005331146196432233, "grad_norm": 2.7601656913757324, "learning_rate": 3.537414965986395e-06, "loss": 1.2809, "step": 26 }, { "epoch": 0.0055361902809103955, "grad_norm": 2.9200809001922607, "learning_rate": 3.6734693877551024e-06, "loss": 1.3573, "step": 27 }, { "epoch": 0.005741234365388558, "grad_norm": 3.089107036590576, "learning_rate": 3.80952380952381e-06, "loss": 1.3258, "step": 28 }, { "epoch": 0.005946278449866721, "grad_norm": 2.623694658279419, "learning_rate": 3.945578231292517e-06, "loss": 1.3288, "step": 29 }, { "epoch": 0.006151322534344884, "grad_norm": 2.7469542026519775, "learning_rate": 4.081632653061225e-06, "loss": 1.3678, "step": 30 }, { "epoch": 0.006356366618823047, "grad_norm": 2.8049144744873047, "learning_rate": 4.217687074829933e-06, "loss": 1.4187, "step": 31 }, { "epoch": 0.00656141070330121, "grad_norm": 2.364210367202759, "learning_rate": 4.35374149659864e-06, "loss": 1.2465, "step": 32 }, { "epoch": 0.006766454787779373, "grad_norm": 2.197099208831787, "learning_rate": 4.489795918367348e-06, "loss": 1.2985, "step": 33 }, { "epoch": 0.006971498872257536, "grad_norm": 2.147989273071289, "learning_rate": 4.6258503401360546e-06, "loss": 1.2626, "step": 34 }, { "epoch": 0.007176542956735698, "grad_norm": 2.1869521141052246, "learning_rate": 4.761904761904762e-06, "loss": 1.226, "step": 35 }, { "epoch": 0.007381587041213861, "grad_norm": 2.049344778060913, "learning_rate": 4.897959183673469e-06, "loss": 1.2626, "step": 36 }, { "epoch": 0.0075866311256920236, "grad_norm": 2.0684967041015625, "learning_rate": 5.034013605442177e-06, "loss": 1.2427, "step": 37 }, { "epoch": 0.0077916752101701865, "grad_norm": 2.0120723247528076, "learning_rate": 5.170068027210885e-06, "loss": 1.278, "step": 38 }, { "epoch": 0.00799671929464835, "grad_norm": 1.8640087842941284, "learning_rate": 5.306122448979593e-06, "loss": 1.2353, "step": 39 }, { "epoch": 0.008201763379126512, "grad_norm": 1.8767114877700806, "learning_rate": 5.442176870748301e-06, "loss": 1.172, "step": 40 }, { "epoch": 0.008406807463604674, "grad_norm": 2.25344181060791, "learning_rate": 5.578231292517007e-06, "loss": 1.2056, "step": 41 }, { "epoch": 0.008611851548082838, "grad_norm": 2.2110986709594727, "learning_rate": 5.7142857142857145e-06, "loss": 1.2738, "step": 42 }, { "epoch": 0.008816895632561, "grad_norm": 2.073709726333618, "learning_rate": 5.850340136054422e-06, "loss": 1.2056, "step": 43 }, { "epoch": 0.009021939717039164, "grad_norm": 1.751121997833252, "learning_rate": 5.98639455782313e-06, "loss": 1.2356, "step": 44 }, { "epoch": 0.009226983801517326, "grad_norm": 1.845166563987732, "learning_rate": 6.122448979591837e-06, "loss": 1.1333, "step": 45 }, { "epoch": 0.00943202788599549, "grad_norm": 1.8606078624725342, "learning_rate": 6.258503401360545e-06, "loss": 1.2204, "step": 46 }, { "epoch": 0.009637071970473652, "grad_norm": 1.9109306335449219, "learning_rate": 6.394557823129253e-06, "loss": 1.2721, "step": 47 }, { "epoch": 0.009842116054951815, "grad_norm": 1.7726638317108154, "learning_rate": 6.530612244897959e-06, "loss": 1.2634, "step": 48 }, { "epoch": 0.010047160139429977, "grad_norm": 1.898488163948059, "learning_rate": 6.666666666666667e-06, "loss": 1.1808, "step": 49 }, { "epoch": 0.01025220422390814, "grad_norm": 1.971341609954834, "learning_rate": 6.8027210884353745e-06, "loss": 1.1542, "step": 50 }, { "epoch": 0.010457248308386303, "grad_norm": 1.9234049320220947, "learning_rate": 6.938775510204082e-06, "loss": 1.2548, "step": 51 }, { "epoch": 0.010662292392864465, "grad_norm": 1.9180388450622559, "learning_rate": 7.07482993197279e-06, "loss": 1.2301, "step": 52 }, { "epoch": 0.010867336477342629, "grad_norm": 1.9492604732513428, "learning_rate": 7.210884353741497e-06, "loss": 1.2188, "step": 53 }, { "epoch": 0.011072380561820791, "grad_norm": 1.863508701324463, "learning_rate": 7.346938775510205e-06, "loss": 1.2092, "step": 54 }, { "epoch": 0.011277424646298955, "grad_norm": 1.743523359298706, "learning_rate": 7.482993197278913e-06, "loss": 1.171, "step": 55 }, { "epoch": 0.011482468730777117, "grad_norm": 1.8969303369522095, "learning_rate": 7.61904761904762e-06, "loss": 1.0957, "step": 56 }, { "epoch": 0.01168751281525528, "grad_norm": 1.7344415187835693, "learning_rate": 7.755102040816327e-06, "loss": 1.1056, "step": 57 }, { "epoch": 0.011892556899733443, "grad_norm": 1.7017936706542969, "learning_rate": 7.891156462585034e-06, "loss": 1.2288, "step": 58 }, { "epoch": 0.012097600984211606, "grad_norm": 1.7878144979476929, "learning_rate": 8.027210884353741e-06, "loss": 1.2263, "step": 59 }, { "epoch": 0.012302645068689768, "grad_norm": 1.9525341987609863, "learning_rate": 8.16326530612245e-06, "loss": 1.1847, "step": 60 }, { "epoch": 0.01250768915316793, "grad_norm": 1.894495964050293, "learning_rate": 8.299319727891157e-06, "loss": 1.0376, "step": 61 }, { "epoch": 0.012712733237646094, "grad_norm": 1.6344630718231201, "learning_rate": 8.435374149659866e-06, "loss": 1.0759, "step": 62 }, { "epoch": 0.012917777322124256, "grad_norm": 1.7110151052474976, "learning_rate": 8.571428571428571e-06, "loss": 1.1374, "step": 63 }, { "epoch": 0.01312282140660242, "grad_norm": 1.765801191329956, "learning_rate": 8.70748299319728e-06, "loss": 1.1362, "step": 64 }, { "epoch": 0.013327865491080582, "grad_norm": 1.9901983737945557, "learning_rate": 8.843537414965987e-06, "loss": 1.2065, "step": 65 }, { "epoch": 0.013532909575558746, "grad_norm": 1.5402772426605225, "learning_rate": 8.979591836734695e-06, "loss": 1.0388, "step": 66 }, { "epoch": 0.013737953660036908, "grad_norm": 1.727859377861023, "learning_rate": 9.115646258503402e-06, "loss": 1.1666, "step": 67 }, { "epoch": 0.013942997744515071, "grad_norm": 1.74527108669281, "learning_rate": 9.251700680272109e-06, "loss": 1.1776, "step": 68 }, { "epoch": 0.014148041828993234, "grad_norm": 1.893489956855774, "learning_rate": 9.387755102040818e-06, "loss": 1.1567, "step": 69 }, { "epoch": 0.014353085913471396, "grad_norm": 1.831405520439148, "learning_rate": 9.523809523809525e-06, "loss": 1.1296, "step": 70 }, { "epoch": 0.01455812999794956, "grad_norm": 1.7720377445220947, "learning_rate": 9.659863945578232e-06, "loss": 1.1385, "step": 71 }, { "epoch": 0.014763174082427721, "grad_norm": 1.7247951030731201, "learning_rate": 9.795918367346939e-06, "loss": 1.123, "step": 72 }, { "epoch": 0.014968218166905885, "grad_norm": 1.8244285583496094, "learning_rate": 9.931972789115647e-06, "loss": 1.119, "step": 73 }, { "epoch": 0.015173262251384047, "grad_norm": 1.9011858701705933, "learning_rate": 1.0068027210884354e-05, "loss": 1.1577, "step": 74 }, { "epoch": 0.015378306335862211, "grad_norm": 1.8621598482131958, "learning_rate": 1.0204081632653063e-05, "loss": 1.1467, "step": 75 }, { "epoch": 0.015583350420340373, "grad_norm": 1.8378175497055054, "learning_rate": 1.034013605442177e-05, "loss": 1.1282, "step": 76 }, { "epoch": 0.015788394504818535, "grad_norm": 1.8820751905441284, "learning_rate": 1.0476190476190477e-05, "loss": 1.1112, "step": 77 }, { "epoch": 0.0159934385892967, "grad_norm": 2.001817226409912, "learning_rate": 1.0612244897959186e-05, "loss": 0.9652, "step": 78 }, { "epoch": 0.016198482673774862, "grad_norm": 1.90394926071167, "learning_rate": 1.0748299319727893e-05, "loss": 1.1321, "step": 79 }, { "epoch": 0.016403526758253024, "grad_norm": 1.8320279121398926, "learning_rate": 1.0884353741496601e-05, "loss": 1.1255, "step": 80 }, { "epoch": 0.016608570842731187, "grad_norm": 1.759293556213379, "learning_rate": 1.1020408163265306e-05, "loss": 1.1096, "step": 81 }, { "epoch": 0.01681361492720935, "grad_norm": 1.7571510076522827, "learning_rate": 1.1156462585034013e-05, "loss": 1.1491, "step": 82 }, { "epoch": 0.017018659011687514, "grad_norm": 1.962896466255188, "learning_rate": 1.1292517006802722e-05, "loss": 1.0692, "step": 83 }, { "epoch": 0.017223703096165676, "grad_norm": 1.6759494543075562, "learning_rate": 1.1428571428571429e-05, "loss": 1.0861, "step": 84 }, { "epoch": 0.017428747180643838, "grad_norm": 1.855803370475769, "learning_rate": 1.1564625850340136e-05, "loss": 1.1406, "step": 85 }, { "epoch": 0.017633791265122, "grad_norm": 1.7745225429534912, "learning_rate": 1.1700680272108845e-05, "loss": 1.1728, "step": 86 }, { "epoch": 0.017838835349600166, "grad_norm": 1.8114947080612183, "learning_rate": 1.1836734693877552e-05, "loss": 1.0919, "step": 87 }, { "epoch": 0.018043879434078328, "grad_norm": 1.9306602478027344, "learning_rate": 1.197278911564626e-05, "loss": 1.1022, "step": 88 }, { "epoch": 0.01824892351855649, "grad_norm": 1.8473849296569824, "learning_rate": 1.2108843537414967e-05, "loss": 1.1027, "step": 89 }, { "epoch": 0.01845396760303465, "grad_norm": 1.8535244464874268, "learning_rate": 1.2244897959183674e-05, "loss": 1.089, "step": 90 }, { "epoch": 0.018659011687512814, "grad_norm": 1.74315345287323, "learning_rate": 1.2380952380952383e-05, "loss": 1.1733, "step": 91 }, { "epoch": 0.01886405577199098, "grad_norm": 1.7871073484420776, "learning_rate": 1.251700680272109e-05, "loss": 1.139, "step": 92 }, { "epoch": 0.01906909985646914, "grad_norm": 2.0404415130615234, "learning_rate": 1.2653061224489798e-05, "loss": 1.1903, "step": 93 }, { "epoch": 0.019274143940947303, "grad_norm": 2.0559206008911133, "learning_rate": 1.2789115646258505e-05, "loss": 1.0976, "step": 94 }, { "epoch": 0.019479188025425465, "grad_norm": 1.9368796348571777, "learning_rate": 1.2925170068027212e-05, "loss": 1.0686, "step": 95 }, { "epoch": 0.01968423210990363, "grad_norm": 1.9620164632797241, "learning_rate": 1.3061224489795918e-05, "loss": 1.1676, "step": 96 }, { "epoch": 0.019889276194381793, "grad_norm": 2.0121655464172363, "learning_rate": 1.3197278911564626e-05, "loss": 1.1754, "step": 97 }, { "epoch": 0.020094320278859955, "grad_norm": 1.6618162393569946, "learning_rate": 1.3333333333333333e-05, "loss": 1.0602, "step": 98 }, { "epoch": 0.020299364363338117, "grad_norm": 1.742382287979126, "learning_rate": 1.3469387755102042e-05, "loss": 1.0908, "step": 99 }, { "epoch": 0.02050440844781628, "grad_norm": 1.8762692213058472, "learning_rate": 1.3605442176870749e-05, "loss": 1.0632, "step": 100 }, { "epoch": 0.020709452532294444, "grad_norm": 1.8384193181991577, "learning_rate": 1.3741496598639456e-05, "loss": 1.1266, "step": 101 }, { "epoch": 0.020914496616772606, "grad_norm": 1.6157550811767578, "learning_rate": 1.3877551020408165e-05, "loss": 1.0654, "step": 102 }, { "epoch": 0.02111954070125077, "grad_norm": 1.7707439661026, "learning_rate": 1.4013605442176872e-05, "loss": 1.0817, "step": 103 }, { "epoch": 0.02132458478572893, "grad_norm": 1.7485060691833496, "learning_rate": 1.414965986394558e-05, "loss": 1.0533, "step": 104 }, { "epoch": 0.021529628870207096, "grad_norm": 1.8275001049041748, "learning_rate": 1.4285714285714287e-05, "loss": 1.1579, "step": 105 }, { "epoch": 0.021734672954685258, "grad_norm": 1.7671985626220703, "learning_rate": 1.4421768707482994e-05, "loss": 1.1579, "step": 106 }, { "epoch": 0.02193971703916342, "grad_norm": 1.6609301567077637, "learning_rate": 1.4557823129251703e-05, "loss": 1.1529, "step": 107 }, { "epoch": 0.022144761123641582, "grad_norm": 1.6967482566833496, "learning_rate": 1.469387755102041e-05, "loss": 1.063, "step": 108 }, { "epoch": 0.022349805208119744, "grad_norm": 1.6827672719955444, "learning_rate": 1.4829931972789118e-05, "loss": 1.0811, "step": 109 }, { "epoch": 0.02255484929259791, "grad_norm": 1.6297671794891357, "learning_rate": 1.4965986394557825e-05, "loss": 1.192, "step": 110 }, { "epoch": 0.02275989337707607, "grad_norm": 1.6657370328903198, "learning_rate": 1.510204081632653e-05, "loss": 1.1636, "step": 111 }, { "epoch": 0.022964937461554234, "grad_norm": 1.710831642150879, "learning_rate": 1.523809523809524e-05, "loss": 1.0936, "step": 112 }, { "epoch": 0.023169981546032396, "grad_norm": 1.7785454988479614, "learning_rate": 1.5374149659863945e-05, "loss": 1.0838, "step": 113 }, { "epoch": 0.02337502563051056, "grad_norm": 1.7085858583450317, "learning_rate": 1.5510204081632655e-05, "loss": 1.091, "step": 114 }, { "epoch": 0.023580069714988723, "grad_norm": 1.7374515533447266, "learning_rate": 1.5646258503401362e-05, "loss": 1.1155, "step": 115 }, { "epoch": 0.023785113799466885, "grad_norm": 1.6962194442749023, "learning_rate": 1.578231292517007e-05, "loss": 1.0378, "step": 116 }, { "epoch": 0.023990157883945047, "grad_norm": 1.644059181213379, "learning_rate": 1.5918367346938776e-05, "loss": 1.0976, "step": 117 }, { "epoch": 0.024195201968423213, "grad_norm": 1.911876916885376, "learning_rate": 1.6054421768707483e-05, "loss": 1.123, "step": 118 }, { "epoch": 0.024400246052901375, "grad_norm": 1.5711785554885864, "learning_rate": 1.6190476190476193e-05, "loss": 1.1399, "step": 119 }, { "epoch": 0.024605290137379537, "grad_norm": 1.7551281452178955, "learning_rate": 1.63265306122449e-05, "loss": 1.1011, "step": 120 }, { "epoch": 0.0248103342218577, "grad_norm": 1.875630259513855, "learning_rate": 1.6462585034013607e-05, "loss": 1.0708, "step": 121 }, { "epoch": 0.02501537830633586, "grad_norm": 1.688490390777588, "learning_rate": 1.6598639455782314e-05, "loss": 1.1218, "step": 122 }, { "epoch": 0.025220422390814026, "grad_norm": 1.7593094110488892, "learning_rate": 1.673469387755102e-05, "loss": 1.1049, "step": 123 }, { "epoch": 0.02542546647529219, "grad_norm": 1.6504801511764526, "learning_rate": 1.687074829931973e-05, "loss": 1.1468, "step": 124 }, { "epoch": 0.02563051055977035, "grad_norm": 1.7844185829162598, "learning_rate": 1.7006802721088435e-05, "loss": 1.1071, "step": 125 }, { "epoch": 0.025835554644248512, "grad_norm": 1.6569106578826904, "learning_rate": 1.7142857142857142e-05, "loss": 1.0959, "step": 126 }, { "epoch": 0.026040598728726678, "grad_norm": 1.6201845407485962, "learning_rate": 1.7278911564625852e-05, "loss": 1.0708, "step": 127 }, { "epoch": 0.02624564281320484, "grad_norm": 1.5998262166976929, "learning_rate": 1.741496598639456e-05, "loss": 1.0959, "step": 128 }, { "epoch": 0.026450686897683002, "grad_norm": 1.585235834121704, "learning_rate": 1.7551020408163266e-05, "loss": 1.0296, "step": 129 }, { "epoch": 0.026655730982161164, "grad_norm": 1.5520007610321045, "learning_rate": 1.7687074829931973e-05, "loss": 1.0882, "step": 130 }, { "epoch": 0.026860775066639326, "grad_norm": 1.6649219989776611, "learning_rate": 1.782312925170068e-05, "loss": 1.1215, "step": 131 }, { "epoch": 0.02706581915111749, "grad_norm": 1.5785614252090454, "learning_rate": 1.795918367346939e-05, "loss": 0.9944, "step": 132 }, { "epoch": 0.027270863235595653, "grad_norm": 1.7236407995224, "learning_rate": 1.8095238095238097e-05, "loss": 1.0416, "step": 133 }, { "epoch": 0.027475907320073815, "grad_norm": 1.7926677465438843, "learning_rate": 1.8231292517006804e-05, "loss": 1.0975, "step": 134 }, { "epoch": 0.027680951404551978, "grad_norm": 1.8240793943405151, "learning_rate": 1.836734693877551e-05, "loss": 1.0996, "step": 135 }, { "epoch": 0.027885995489030143, "grad_norm": 1.7598989009857178, "learning_rate": 1.8503401360544218e-05, "loss": 1.1275, "step": 136 }, { "epoch": 0.028091039573508305, "grad_norm": 1.7654674053192139, "learning_rate": 1.863945578231293e-05, "loss": 1.0955, "step": 137 }, { "epoch": 0.028296083657986467, "grad_norm": 1.732854962348938, "learning_rate": 1.8775510204081636e-05, "loss": 1.0997, "step": 138 }, { "epoch": 0.02850112774246463, "grad_norm": 1.7016481161117554, "learning_rate": 1.8911564625850343e-05, "loss": 1.0969, "step": 139 }, { "epoch": 0.02870617182694279, "grad_norm": 1.6110559701919556, "learning_rate": 1.904761904761905e-05, "loss": 1.0639, "step": 140 }, { "epoch": 0.028911215911420957, "grad_norm": 1.5945171117782593, "learning_rate": 1.9183673469387756e-05, "loss": 1.1012, "step": 141 }, { "epoch": 0.02911625999589912, "grad_norm": 1.6395087242126465, "learning_rate": 1.9319727891156463e-05, "loss": 1.0938, "step": 142 }, { "epoch": 0.02932130408037728, "grad_norm": 1.6692111492156982, "learning_rate": 1.945578231292517e-05, "loss": 1.1397, "step": 143 }, { "epoch": 0.029526348164855443, "grad_norm": 1.7996747493743896, "learning_rate": 1.9591836734693877e-05, "loss": 1.0741, "step": 144 }, { "epoch": 0.029731392249333608, "grad_norm": 1.6380680799484253, "learning_rate": 1.9727891156462588e-05, "loss": 1.0782, "step": 145 }, { "epoch": 0.02993643633381177, "grad_norm": 1.6311140060424805, "learning_rate": 1.9863945578231295e-05, "loss": 1.1232, "step": 146 }, { "epoch": 0.030141480418289932, "grad_norm": 1.5871338844299316, "learning_rate": 2e-05, "loss": 1.0914, "step": 147 }, { "epoch": 0.030346524502768094, "grad_norm": 1.83720064163208, "learning_rate": 1.9999997794294877e-05, "loss": 1.1434, "step": 148 }, { "epoch": 0.030551568587246256, "grad_norm": 1.9153143167495728, "learning_rate": 1.9999991177180472e-05, "loss": 1.1069, "step": 149 }, { "epoch": 0.030756612671724422, "grad_norm": 1.7401002645492554, "learning_rate": 1.9999980148659714e-05, "loss": 1.0172, "step": 150 }, { "epoch": 0.030961656756202584, "grad_norm": 1.6493116617202759, "learning_rate": 1.999996470873746e-05, "loss": 1.0599, "step": 151 }, { "epoch": 0.031166700840680746, "grad_norm": 1.7222634553909302, "learning_rate": 1.9999944857420527e-05, "loss": 1.1752, "step": 152 }, { "epoch": 0.03137174492515891, "grad_norm": 1.809936761856079, "learning_rate": 1.999992059471767e-05, "loss": 0.9979, "step": 153 }, { "epoch": 0.03157678900963707, "grad_norm": 1.7012087106704712, "learning_rate": 1.999989192063959e-05, "loss": 1.087, "step": 154 }, { "epoch": 0.03178183309411523, "grad_norm": 1.6264607906341553, "learning_rate": 1.9999858835198938e-05, "loss": 1.1399, "step": 155 }, { "epoch": 0.0319868771785934, "grad_norm": 1.6558951139450073, "learning_rate": 1.999982133841031e-05, "loss": 1.0469, "step": 156 }, { "epoch": 0.03219192126307156, "grad_norm": 1.563999891281128, "learning_rate": 1.9999779430290247e-05, "loss": 1.0621, "step": 157 }, { "epoch": 0.032396965347549725, "grad_norm": 1.6692713499069214, "learning_rate": 1.9999733110857237e-05, "loss": 1.0715, "step": 158 }, { "epoch": 0.03260200943202789, "grad_norm": 1.8647233247756958, "learning_rate": 1.9999682380131712e-05, "loss": 0.9877, "step": 159 }, { "epoch": 0.03280705351650605, "grad_norm": 1.7016457319259644, "learning_rate": 1.9999627238136054e-05, "loss": 1.1007, "step": 160 }, { "epoch": 0.03301209760098421, "grad_norm": 1.683165431022644, "learning_rate": 1.9999567684894582e-05, "loss": 0.9797, "step": 161 }, { "epoch": 0.03321714168546237, "grad_norm": 1.5590035915374756, "learning_rate": 1.9999503720433575e-05, "loss": 1.0875, "step": 162 }, { "epoch": 0.033422185769940535, "grad_norm": 1.8474650382995605, "learning_rate": 1.9999435344781247e-05, "loss": 1.0526, "step": 163 }, { "epoch": 0.0336272298544187, "grad_norm": 1.781255841255188, "learning_rate": 1.999936255796776e-05, "loss": 1.1819, "step": 164 }, { "epoch": 0.033832273938896866, "grad_norm": 1.666731595993042, "learning_rate": 1.9999285360025228e-05, "loss": 1.0596, "step": 165 }, { "epoch": 0.03403731802337503, "grad_norm": 1.771173357963562, "learning_rate": 1.99992037509877e-05, "loss": 1.0859, "step": 166 }, { "epoch": 0.03424236210785319, "grad_norm": 1.6486271619796753, "learning_rate": 1.999911773089118e-05, "loss": 1.1092, "step": 167 }, { "epoch": 0.03444740619233135, "grad_norm": 1.703294038772583, "learning_rate": 1.999902729977362e-05, "loss": 1.0238, "step": 168 }, { "epoch": 0.034652450276809514, "grad_norm": 1.638447880744934, "learning_rate": 1.9998932457674904e-05, "loss": 1.0897, "step": 169 }, { "epoch": 0.034857494361287676, "grad_norm": 1.5265074968338013, "learning_rate": 1.9998833204636874e-05, "loss": 1.0804, "step": 170 }, { "epoch": 0.03506253844576584, "grad_norm": 1.6938060522079468, "learning_rate": 1.9998729540703317e-05, "loss": 1.0131, "step": 171 }, { "epoch": 0.035267582530244, "grad_norm": 1.6699985265731812, "learning_rate": 1.999862146591996e-05, "loss": 1.0801, "step": 172 }, { "epoch": 0.03547262661472216, "grad_norm": 1.5471646785736084, "learning_rate": 1.999850898033448e-05, "loss": 1.0248, "step": 173 }, { "epoch": 0.03567767069920033, "grad_norm": 1.8196700811386108, "learning_rate": 1.9998392083996503e-05, "loss": 1.0806, "step": 174 }, { "epoch": 0.03588271478367849, "grad_norm": 1.7067763805389404, "learning_rate": 1.9998270776957592e-05, "loss": 1.0838, "step": 175 }, { "epoch": 0.036087758868156655, "grad_norm": 1.6548247337341309, "learning_rate": 1.9998145059271263e-05, "loss": 1.0853, "step": 176 }, { "epoch": 0.03629280295263482, "grad_norm": 1.6514452695846558, "learning_rate": 1.9998014930992976e-05, "loss": 1.1034, "step": 177 }, { "epoch": 0.03649784703711298, "grad_norm": 1.567408800125122, "learning_rate": 1.999788039218013e-05, "loss": 1.0346, "step": 178 }, { "epoch": 0.03670289112159114, "grad_norm": 1.5189143419265747, "learning_rate": 1.9997741442892083e-05, "loss": 1.0723, "step": 179 }, { "epoch": 0.0369079352060693, "grad_norm": 1.6458110809326172, "learning_rate": 1.999759808319013e-05, "loss": 1.0571, "step": 180 }, { "epoch": 0.037112979290547465, "grad_norm": 1.6987104415893555, "learning_rate": 1.999745031313751e-05, "loss": 1.1188, "step": 181 }, { "epoch": 0.03731802337502563, "grad_norm": 1.6914490461349487, "learning_rate": 1.9997298132799408e-05, "loss": 1.0963, "step": 182 }, { "epoch": 0.037523067459503796, "grad_norm": 1.5727925300598145, "learning_rate": 1.9997141542242962e-05, "loss": 1.0303, "step": 183 }, { "epoch": 0.03772811154398196, "grad_norm": 1.787243366241455, "learning_rate": 1.999698054153725e-05, "loss": 1.1426, "step": 184 }, { "epoch": 0.03793315562846012, "grad_norm": 1.8316630125045776, "learning_rate": 1.9996815130753297e-05, "loss": 1.0037, "step": 185 }, { "epoch": 0.03813819971293828, "grad_norm": 1.6016260385513306, "learning_rate": 1.9996645309964068e-05, "loss": 1.1639, "step": 186 }, { "epoch": 0.038343243797416444, "grad_norm": 1.7771539688110352, "learning_rate": 1.9996471079244477e-05, "loss": 1.1377, "step": 187 }, { "epoch": 0.038548287881894606, "grad_norm": 1.5600498914718628, "learning_rate": 1.9996292438671392e-05, "loss": 1.0426, "step": 188 }, { "epoch": 0.03875333196637277, "grad_norm": 1.630336046218872, "learning_rate": 1.9996109388323614e-05, "loss": 0.9773, "step": 189 }, { "epoch": 0.03895837605085093, "grad_norm": 1.4694344997406006, "learning_rate": 1.9995921928281893e-05, "loss": 1.1075, "step": 190 }, { "epoch": 0.03916342013532909, "grad_norm": 1.629884958267212, "learning_rate": 1.9995730058628928e-05, "loss": 1.0524, "step": 191 }, { "epoch": 0.03936846421980726, "grad_norm": 1.567692518234253, "learning_rate": 1.999553377944936e-05, "loss": 1.0096, "step": 192 }, { "epoch": 0.039573508304285424, "grad_norm": 1.8296796083450317, "learning_rate": 1.9995333090829774e-05, "loss": 1.0094, "step": 193 }, { "epoch": 0.039778552388763586, "grad_norm": 1.6546266078948975, "learning_rate": 1.9995127992858703e-05, "loss": 1.0182, "step": 194 }, { "epoch": 0.03998359647324175, "grad_norm": 1.8073774576187134, "learning_rate": 1.9994918485626625e-05, "loss": 1.0904, "step": 195 }, { "epoch": 0.04018864055771991, "grad_norm": 1.6416845321655273, "learning_rate": 1.999470456922596e-05, "loss": 1.0907, "step": 196 }, { "epoch": 0.04039368464219807, "grad_norm": 1.6455363035202026, "learning_rate": 1.9994486243751076e-05, "loss": 1.0197, "step": 197 }, { "epoch": 0.040598728726676234, "grad_norm": 1.6055454015731812, "learning_rate": 1.999426350929829e-05, "loss": 1.0844, "step": 198 }, { "epoch": 0.040803772811154396, "grad_norm": 1.6162798404693604, "learning_rate": 1.999403636596585e-05, "loss": 1.0616, "step": 199 }, { "epoch": 0.04100881689563256, "grad_norm": 1.5896493196487427, "learning_rate": 1.9993804813853968e-05, "loss": 1.059, "step": 200 }, { "epoch": 0.04121386098011073, "grad_norm": 1.6926223039627075, "learning_rate": 1.9993568853064788e-05, "loss": 1.0575, "step": 201 }, { "epoch": 0.04141890506458889, "grad_norm": 1.4698514938354492, "learning_rate": 1.9993328483702393e-05, "loss": 1.0819, "step": 202 }, { "epoch": 0.04162394914906705, "grad_norm": 1.5445387363433838, "learning_rate": 1.9993083705872833e-05, "loss": 1.0544, "step": 203 }, { "epoch": 0.04182899323354521, "grad_norm": 1.6728525161743164, "learning_rate": 1.9992834519684084e-05, "loss": 1.1156, "step": 204 }, { "epoch": 0.042034037318023375, "grad_norm": 1.5741982460021973, "learning_rate": 1.9992580925246073e-05, "loss": 1.073, "step": 205 }, { "epoch": 0.04223908140250154, "grad_norm": 1.5854299068450928, "learning_rate": 1.9992322922670667e-05, "loss": 0.9357, "step": 206 }, { "epoch": 0.0424441254869797, "grad_norm": 1.649208426475525, "learning_rate": 1.999206051207169e-05, "loss": 1.0217, "step": 207 }, { "epoch": 0.04264916957145786, "grad_norm": 1.615433931350708, "learning_rate": 1.9991793693564894e-05, "loss": 1.0801, "step": 208 }, { "epoch": 0.04285421365593602, "grad_norm": 1.7857059240341187, "learning_rate": 1.9991522467267985e-05, "loss": 1.0685, "step": 209 }, { "epoch": 0.04305925774041419, "grad_norm": 1.6191749572753906, "learning_rate": 1.9991246833300614e-05, "loss": 1.0225, "step": 210 }, { "epoch": 0.043264301824892354, "grad_norm": 1.5881736278533936, "learning_rate": 1.9990966791784375e-05, "loss": 1.1099, "step": 211 }, { "epoch": 0.043469345909370516, "grad_norm": 1.7025728225708008, "learning_rate": 1.9990682342842805e-05, "loss": 1.0271, "step": 212 }, { "epoch": 0.04367438999384868, "grad_norm": 1.6976343393325806, "learning_rate": 1.9990393486601385e-05, "loss": 1.0811, "step": 213 }, { "epoch": 0.04387943407832684, "grad_norm": 1.5459390878677368, "learning_rate": 1.9990100223187544e-05, "loss": 1.0155, "step": 214 }, { "epoch": 0.044084478162805, "grad_norm": 1.5807273387908936, "learning_rate": 1.9989802552730647e-05, "loss": 1.0074, "step": 215 }, { "epoch": 0.044289522247283164, "grad_norm": 1.673044204711914, "learning_rate": 1.9989500475362014e-05, "loss": 1.1257, "step": 216 }, { "epoch": 0.044494566331761326, "grad_norm": 1.8361009359359741, "learning_rate": 1.99891939912149e-05, "loss": 1.0508, "step": 217 }, { "epoch": 0.04469961041623949, "grad_norm": 1.6125807762145996, "learning_rate": 1.9988883100424515e-05, "loss": 1.0434, "step": 218 }, { "epoch": 0.04490465450071766, "grad_norm": 1.5914429426193237, "learning_rate": 1.9988567803127997e-05, "loss": 1.039, "step": 219 }, { "epoch": 0.04510969858519582, "grad_norm": 1.4886208772659302, "learning_rate": 1.9988248099464437e-05, "loss": 1.0172, "step": 220 }, { "epoch": 0.04531474266967398, "grad_norm": 1.5641365051269531, "learning_rate": 1.998792398957488e-05, "loss": 1.0833, "step": 221 }, { "epoch": 0.04551978675415214, "grad_norm": 1.598436713218689, "learning_rate": 1.9987595473602292e-05, "loss": 1.0342, "step": 222 }, { "epoch": 0.045724830838630305, "grad_norm": 1.6428006887435913, "learning_rate": 1.9987262551691597e-05, "loss": 1.148, "step": 223 }, { "epoch": 0.04592987492310847, "grad_norm": 1.5412030220031738, "learning_rate": 1.9986925223989665e-05, "loss": 1.0831, "step": 224 }, { "epoch": 0.04613491900758663, "grad_norm": 1.5360864400863647, "learning_rate": 1.9986583490645305e-05, "loss": 1.0796, "step": 225 }, { "epoch": 0.04633996309206479, "grad_norm": 1.5773122310638428, "learning_rate": 1.998623735180927e-05, "loss": 0.97, "step": 226 }, { "epoch": 0.04654500717654296, "grad_norm": 1.661786437034607, "learning_rate": 1.9985886807634246e-05, "loss": 1.0618, "step": 227 }, { "epoch": 0.04675005126102112, "grad_norm": 1.7075649499893188, "learning_rate": 1.9985531858274886e-05, "loss": 1.0343, "step": 228 }, { "epoch": 0.046955095345499284, "grad_norm": 1.4621360301971436, "learning_rate": 1.9985172503887767e-05, "loss": 1.0946, "step": 229 }, { "epoch": 0.047160139429977446, "grad_norm": 1.6130188703536987, "learning_rate": 1.998480874463141e-05, "loss": 1.0084, "step": 230 }, { "epoch": 0.04736518351445561, "grad_norm": 1.6544601917266846, "learning_rate": 1.9984440580666297e-05, "loss": 1.0503, "step": 231 }, { "epoch": 0.04757022759893377, "grad_norm": 1.474686861038208, "learning_rate": 1.9984068012154824e-05, "loss": 1.0243, "step": 232 }, { "epoch": 0.04777527168341193, "grad_norm": 1.5949373245239258, "learning_rate": 1.9983691039261358e-05, "loss": 1.0409, "step": 233 }, { "epoch": 0.047980315767890094, "grad_norm": 1.6834007501602173, "learning_rate": 1.9983309662152194e-05, "loss": 0.9684, "step": 234 }, { "epoch": 0.048185359852368256, "grad_norm": 1.662947177886963, "learning_rate": 1.998292388099557e-05, "loss": 1.1173, "step": 235 }, { "epoch": 0.048390403936846425, "grad_norm": 1.665867805480957, "learning_rate": 1.9982533695961678e-05, "loss": 1.1222, "step": 236 }, { "epoch": 0.04859544802132459, "grad_norm": 1.5966871976852417, "learning_rate": 1.9982139107222634e-05, "loss": 1.026, "step": 237 }, { "epoch": 0.04880049210580275, "grad_norm": 1.6512725353240967, "learning_rate": 1.9981740114952513e-05, "loss": 1.0884, "step": 238 }, { "epoch": 0.04900553619028091, "grad_norm": 1.7269357442855835, "learning_rate": 1.9981336719327328e-05, "loss": 1.0572, "step": 239 }, { "epoch": 0.04921058027475907, "grad_norm": 1.5811330080032349, "learning_rate": 1.9980928920525033e-05, "loss": 1.0955, "step": 240 }, { "epoch": 0.049415624359237235, "grad_norm": 1.6099506616592407, "learning_rate": 1.998051671872552e-05, "loss": 0.9664, "step": 241 }, { "epoch": 0.0496206684437154, "grad_norm": 1.617640495300293, "learning_rate": 1.9980100114110637e-05, "loss": 0.9756, "step": 242 }, { "epoch": 0.04982571252819356, "grad_norm": 1.624562382698059, "learning_rate": 1.9979679106864157e-05, "loss": 1.0831, "step": 243 }, { "epoch": 0.05003075661267172, "grad_norm": 1.758339524269104, "learning_rate": 1.9979253697171804e-05, "loss": 1.0408, "step": 244 }, { "epoch": 0.05023580069714989, "grad_norm": 1.601393222808838, "learning_rate": 1.997882388522125e-05, "loss": 0.9473, "step": 245 }, { "epoch": 0.05044084478162805, "grad_norm": 1.5609827041625977, "learning_rate": 1.9978389671202096e-05, "loss": 1.0629, "step": 246 }, { "epoch": 0.050645888866106215, "grad_norm": 1.7045135498046875, "learning_rate": 1.99779510553059e-05, "loss": 1.1522, "step": 247 }, { "epoch": 0.05085093295058438, "grad_norm": 1.5171713829040527, "learning_rate": 1.9977508037726144e-05, "loss": 1.0338, "step": 248 }, { "epoch": 0.05105597703506254, "grad_norm": 1.695656657218933, "learning_rate": 1.9977060618658265e-05, "loss": 1.0579, "step": 249 }, { "epoch": 0.0512610211195407, "grad_norm": 1.5111212730407715, "learning_rate": 1.9976608798299638e-05, "loss": 1.0601, "step": 250 }, { "epoch": 0.05146606520401886, "grad_norm": 1.5330798625946045, "learning_rate": 1.9976152576849582e-05, "loss": 1.0684, "step": 251 }, { "epoch": 0.051671109288497025, "grad_norm": 1.6194430589675903, "learning_rate": 1.9975691954509347e-05, "loss": 1.0555, "step": 252 }, { "epoch": 0.05187615337297519, "grad_norm": 1.4402865171432495, "learning_rate": 1.9975226931482145e-05, "loss": 1.1187, "step": 253 }, { "epoch": 0.052081197457453356, "grad_norm": 1.6178096532821655, "learning_rate": 1.9974757507973105e-05, "loss": 1.0405, "step": 254 }, { "epoch": 0.05228624154193152, "grad_norm": 1.5344501733779907, "learning_rate": 1.9974283684189318e-05, "loss": 1.0797, "step": 255 }, { "epoch": 0.05249128562640968, "grad_norm": 1.5525314807891846, "learning_rate": 1.99738054603398e-05, "loss": 1.1154, "step": 256 }, { "epoch": 0.05269632971088784, "grad_norm": 1.4000139236450195, "learning_rate": 1.9973322836635517e-05, "loss": 1.0626, "step": 257 }, { "epoch": 0.052901373795366004, "grad_norm": 1.5680042505264282, "learning_rate": 1.997283581328938e-05, "loss": 1.1213, "step": 258 }, { "epoch": 0.053106417879844166, "grad_norm": 1.448689341545105, "learning_rate": 1.9972344390516225e-05, "loss": 1.0567, "step": 259 }, { "epoch": 0.05331146196432233, "grad_norm": 1.5614596605300903, "learning_rate": 1.9971848568532846e-05, "loss": 1.0854, "step": 260 }, { "epoch": 0.05351650604880049, "grad_norm": 1.5466113090515137, "learning_rate": 1.997134834755797e-05, "loss": 1.0523, "step": 261 }, { "epoch": 0.05372155013327865, "grad_norm": 1.6164332628250122, "learning_rate": 1.997084372781226e-05, "loss": 1.0014, "step": 262 }, { "epoch": 0.05392659421775682, "grad_norm": 1.4846552610397339, "learning_rate": 1.9970334709518328e-05, "loss": 1.0384, "step": 263 }, { "epoch": 0.05413163830223498, "grad_norm": 1.6358990669250488, "learning_rate": 1.9969821292900728e-05, "loss": 1.0435, "step": 264 }, { "epoch": 0.054336682386713145, "grad_norm": 1.4836297035217285, "learning_rate": 1.996930347818594e-05, "loss": 1.1026, "step": 265 }, { "epoch": 0.05454172647119131, "grad_norm": 1.5060025453567505, "learning_rate": 1.9968781265602396e-05, "loss": 1.106, "step": 266 }, { "epoch": 0.05474677055566947, "grad_norm": 1.6240743398666382, "learning_rate": 1.9968254655380465e-05, "loss": 1.0982, "step": 267 }, { "epoch": 0.05495181464014763, "grad_norm": 1.7134194374084473, "learning_rate": 1.9967723647752463e-05, "loss": 1.1086, "step": 268 }, { "epoch": 0.05515685872462579, "grad_norm": 1.6179522275924683, "learning_rate": 1.996718824295263e-05, "loss": 1.0685, "step": 269 }, { "epoch": 0.055361902809103955, "grad_norm": 1.6325346231460571, "learning_rate": 1.996664844121716e-05, "loss": 1.069, "step": 270 }, { "epoch": 0.05556694689358212, "grad_norm": 1.5098854303359985, "learning_rate": 1.996610424278418e-05, "loss": 1.0821, "step": 271 }, { "epoch": 0.055771990978060286, "grad_norm": 1.4995557069778442, "learning_rate": 1.996555564789376e-05, "loss": 0.9302, "step": 272 }, { "epoch": 0.05597703506253845, "grad_norm": 1.6283340454101562, "learning_rate": 1.996500265678791e-05, "loss": 1.0289, "step": 273 }, { "epoch": 0.05618207914701661, "grad_norm": 1.7198522090911865, "learning_rate": 1.996444526971057e-05, "loss": 1.0497, "step": 274 }, { "epoch": 0.05638712323149477, "grad_norm": 1.5679795742034912, "learning_rate": 1.996388348690763e-05, "loss": 1.0747, "step": 275 }, { "epoch": 0.056592167315972934, "grad_norm": 1.6305418014526367, "learning_rate": 1.9963317308626916e-05, "loss": 1.0867, "step": 276 }, { "epoch": 0.056797211400451096, "grad_norm": 1.5611238479614258, "learning_rate": 1.996274673511819e-05, "loss": 1.0312, "step": 277 }, { "epoch": 0.05700225548492926, "grad_norm": 1.446427583694458, "learning_rate": 1.9962171766633163e-05, "loss": 1.025, "step": 278 }, { "epoch": 0.05720729956940742, "grad_norm": 1.5114107131958008, "learning_rate": 1.996159240342547e-05, "loss": 0.9649, "step": 279 }, { "epoch": 0.05741234365388558, "grad_norm": 1.4507813453674316, "learning_rate": 1.996100864575069e-05, "loss": 1.0904, "step": 280 }, { "epoch": 0.05761738773836375, "grad_norm": 1.4566558599472046, "learning_rate": 1.996042049386635e-05, "loss": 1.0219, "step": 281 }, { "epoch": 0.05782243182284191, "grad_norm": 1.506580114364624, "learning_rate": 1.99598279480319e-05, "loss": 1.0435, "step": 282 }, { "epoch": 0.058027475907320075, "grad_norm": 1.5476516485214233, "learning_rate": 1.9959231008508742e-05, "loss": 1.0342, "step": 283 }, { "epoch": 0.05823251999179824, "grad_norm": 1.4561588764190674, "learning_rate": 1.995862967556021e-05, "loss": 0.9776, "step": 284 }, { "epoch": 0.0584375640762764, "grad_norm": 1.6844754219055176, "learning_rate": 1.9958023949451574e-05, "loss": 1.0192, "step": 285 }, { "epoch": 0.05864260816075456, "grad_norm": 1.584230661392212, "learning_rate": 1.9957413830450047e-05, "loss": 0.9672, "step": 286 }, { "epoch": 0.05884765224523272, "grad_norm": 1.5372557640075684, "learning_rate": 1.9956799318824776e-05, "loss": 1.0793, "step": 287 }, { "epoch": 0.059052696329710885, "grad_norm": 1.5792317390441895, "learning_rate": 1.9956180414846847e-05, "loss": 1.1001, "step": 288 }, { "epoch": 0.05925774041418905, "grad_norm": 1.5517557859420776, "learning_rate": 1.9955557118789284e-05, "loss": 1.1065, "step": 289 }, { "epoch": 0.059462784498667216, "grad_norm": 1.5979465246200562, "learning_rate": 1.995492943092705e-05, "loss": 1.0495, "step": 290 }, { "epoch": 0.05966782858314538, "grad_norm": 1.4850809574127197, "learning_rate": 1.9954297351537045e-05, "loss": 1.0772, "step": 291 }, { "epoch": 0.05987287266762354, "grad_norm": 1.5550060272216797, "learning_rate": 1.99536608808981e-05, "loss": 1.1002, "step": 292 }, { "epoch": 0.0600779167521017, "grad_norm": 1.7222014665603638, "learning_rate": 1.995302001929099e-05, "loss": 0.9926, "step": 293 }, { "epoch": 0.060282960836579864, "grad_norm": 1.481752634048462, "learning_rate": 1.995237476699843e-05, "loss": 1.095, "step": 294 }, { "epoch": 0.060488004921058026, "grad_norm": 1.611399531364441, "learning_rate": 1.9951725124305062e-05, "loss": 1.0615, "step": 295 }, { "epoch": 0.06069304900553619, "grad_norm": 1.5148794651031494, "learning_rate": 1.9951071091497475e-05, "loss": 1.0463, "step": 296 }, { "epoch": 0.06089809309001435, "grad_norm": 1.6011245250701904, "learning_rate": 1.995041266886419e-05, "loss": 1.0736, "step": 297 }, { "epoch": 0.06110313717449251, "grad_norm": 1.5603957176208496, "learning_rate": 1.9949749856695656e-05, "loss": 1.0461, "step": 298 }, { "epoch": 0.06130818125897068, "grad_norm": 1.4951330423355103, "learning_rate": 1.994908265528427e-05, "loss": 1.0296, "step": 299 }, { "epoch": 0.061513225343448844, "grad_norm": 1.5896416902542114, "learning_rate": 1.9948411064924368e-05, "loss": 1.0746, "step": 300 }, { "epoch": 0.061718269427927006, "grad_norm": 1.6931248903274536, "learning_rate": 1.994773508591221e-05, "loss": 1.0268, "step": 301 }, { "epoch": 0.06192331351240517, "grad_norm": 1.5581387281417847, "learning_rate": 1.9947054718545996e-05, "loss": 1.0349, "step": 302 }, { "epoch": 0.06212835759688333, "grad_norm": 1.4620722532272339, "learning_rate": 1.9946369963125875e-05, "loss": 1.0355, "step": 303 }, { "epoch": 0.06233340168136149, "grad_norm": 1.5284706354141235, "learning_rate": 1.9945680819953907e-05, "loss": 1.0875, "step": 304 }, { "epoch": 0.06253844576583965, "grad_norm": 1.570785403251648, "learning_rate": 1.9944987289334113e-05, "loss": 0.991, "step": 305 }, { "epoch": 0.06274348985031782, "grad_norm": 2.0823607444763184, "learning_rate": 1.9944289371572427e-05, "loss": 1.0521, "step": 306 }, { "epoch": 0.06294853393479598, "grad_norm": 1.6560684442520142, "learning_rate": 1.994358706697674e-05, "loss": 1.0815, "step": 307 }, { "epoch": 0.06315357801927414, "grad_norm": 1.6102004051208496, "learning_rate": 1.9942880375856857e-05, "loss": 1.0337, "step": 308 }, { "epoch": 0.0633586221037523, "grad_norm": 1.6298236846923828, "learning_rate": 1.994216929852454e-05, "loss": 0.9823, "step": 309 }, { "epoch": 0.06356366618823046, "grad_norm": 1.5943448543548584, "learning_rate": 1.9941453835293464e-05, "loss": 1.1412, "step": 310 }, { "epoch": 0.06376871027270863, "grad_norm": 1.547037124633789, "learning_rate": 1.9940733986479254e-05, "loss": 1.097, "step": 311 }, { "epoch": 0.0639737543571868, "grad_norm": 1.5343213081359863, "learning_rate": 1.9940009752399462e-05, "loss": 1.0839, "step": 312 }, { "epoch": 0.06417879844166496, "grad_norm": 1.5677183866500854, "learning_rate": 1.993928113337358e-05, "loss": 1.1255, "step": 313 }, { "epoch": 0.06438384252614313, "grad_norm": 1.6525754928588867, "learning_rate": 1.9938548129723032e-05, "loss": 1.0356, "step": 314 }, { "epoch": 0.06458888661062129, "grad_norm": 1.4789077043533325, "learning_rate": 1.993781074177117e-05, "loss": 1.0443, "step": 315 }, { "epoch": 0.06479393069509945, "grad_norm": 1.5483832359313965, "learning_rate": 1.99370689698433e-05, "loss": 0.9744, "step": 316 }, { "epoch": 0.06499897477957761, "grad_norm": 1.497331142425537, "learning_rate": 1.9936322814266634e-05, "loss": 1.0382, "step": 317 }, { "epoch": 0.06520401886405577, "grad_norm": 1.6753716468811035, "learning_rate": 1.993557227537034e-05, "loss": 1.0028, "step": 318 }, { "epoch": 0.06540906294853394, "grad_norm": 1.493648886680603, "learning_rate": 1.99348173534855e-05, "loss": 1.0961, "step": 319 }, { "epoch": 0.0656141070330121, "grad_norm": 1.579384684562683, "learning_rate": 1.993405804894516e-05, "loss": 1.0949, "step": 320 }, { "epoch": 0.06581915111749026, "grad_norm": 1.5949441194534302, "learning_rate": 1.9933294362084265e-05, "loss": 1.125, "step": 321 }, { "epoch": 0.06602419520196842, "grad_norm": 1.5266457796096802, "learning_rate": 1.9932526293239713e-05, "loss": 0.9536, "step": 322 }, { "epoch": 0.06622923928644658, "grad_norm": 1.5226621627807617, "learning_rate": 1.993175384275033e-05, "loss": 1.0201, "step": 323 }, { "epoch": 0.06643428337092475, "grad_norm": 1.5282865762710571, "learning_rate": 1.9930977010956883e-05, "loss": 1.0849, "step": 324 }, { "epoch": 0.06663932745540291, "grad_norm": 1.5923144817352295, "learning_rate": 1.993019579820205e-05, "loss": 1.0008, "step": 325 }, { "epoch": 0.06684437153988107, "grad_norm": 1.4608021974563599, "learning_rate": 1.992941020483047e-05, "loss": 1.0203, "step": 326 }, { "epoch": 0.06704941562435923, "grad_norm": 1.470996379852295, "learning_rate": 1.9928620231188694e-05, "loss": 0.9478, "step": 327 }, { "epoch": 0.0672544597088374, "grad_norm": 1.3502256870269775, "learning_rate": 1.992782587762521e-05, "loss": 1.0175, "step": 328 }, { "epoch": 0.06745950379331556, "grad_norm": 1.4576836824417114, "learning_rate": 1.9927027144490446e-05, "loss": 0.9943, "step": 329 }, { "epoch": 0.06766454787779373, "grad_norm": 1.5400248765945435, "learning_rate": 1.9926224032136747e-05, "loss": 1.0047, "step": 330 }, { "epoch": 0.0678695919622719, "grad_norm": 1.5958186388015747, "learning_rate": 1.992541654091841e-05, "loss": 0.9373, "step": 331 }, { "epoch": 0.06807463604675006, "grad_norm": 1.6097279787063599, "learning_rate": 1.992460467119164e-05, "loss": 1.1215, "step": 332 }, { "epoch": 0.06827968013122822, "grad_norm": 1.427406907081604, "learning_rate": 1.9923788423314596e-05, "loss": 0.9805, "step": 333 }, { "epoch": 0.06848472421570638, "grad_norm": 1.630497694015503, "learning_rate": 1.9922967797647357e-05, "loss": 1.0992, "step": 334 }, { "epoch": 0.06868976830018454, "grad_norm": 1.5736985206604004, "learning_rate": 1.992214279455193e-05, "loss": 1.0227, "step": 335 }, { "epoch": 0.0688948123846627, "grad_norm": 1.5421696901321411, "learning_rate": 1.992131341439226e-05, "loss": 1.036, "step": 336 }, { "epoch": 0.06909985646914087, "grad_norm": 1.6403664350509644, "learning_rate": 1.992047965753422e-05, "loss": 0.9832, "step": 337 }, { "epoch": 0.06930490055361903, "grad_norm": 1.418555498123169, "learning_rate": 1.991964152434562e-05, "loss": 0.9934, "step": 338 }, { "epoch": 0.06950994463809719, "grad_norm": 1.5252553224563599, "learning_rate": 1.9918799015196185e-05, "loss": 1.0685, "step": 339 }, { "epoch": 0.06971498872257535, "grad_norm": 1.5414422750473022, "learning_rate": 1.9917952130457592e-05, "loss": 0.9693, "step": 340 }, { "epoch": 0.06992003280705351, "grad_norm": 1.7203998565673828, "learning_rate": 1.9917100870503427e-05, "loss": 0.9868, "step": 341 }, { "epoch": 0.07012507689153168, "grad_norm": 1.6154650449752808, "learning_rate": 1.991624523570922e-05, "loss": 1.1116, "step": 342 }, { "epoch": 0.07033012097600984, "grad_norm": 1.616958498954773, "learning_rate": 1.991538522645242e-05, "loss": 0.9953, "step": 343 }, { "epoch": 0.070535165060488, "grad_norm": 1.549331545829773, "learning_rate": 1.9914520843112423e-05, "loss": 1.1157, "step": 344 }, { "epoch": 0.07074020914496616, "grad_norm": 1.4299054145812988, "learning_rate": 1.9913652086070535e-05, "loss": 1.1035, "step": 345 }, { "epoch": 0.07094525322944432, "grad_norm": 1.4222180843353271, "learning_rate": 1.991277895571001e-05, "loss": 1.0112, "step": 346 }, { "epoch": 0.07115029731392249, "grad_norm": 1.4630300998687744, "learning_rate": 1.9911901452416012e-05, "loss": 1.0112, "step": 347 }, { "epoch": 0.07135534139840066, "grad_norm": 1.4896178245544434, "learning_rate": 1.991101957657565e-05, "loss": 1.0123, "step": 348 }, { "epoch": 0.07156038548287882, "grad_norm": 1.5042457580566406, "learning_rate": 1.991013332857795e-05, "loss": 1.1812, "step": 349 }, { "epoch": 0.07176542956735699, "grad_norm": 1.476757526397705, "learning_rate": 1.9909242708813878e-05, "loss": 1.1213, "step": 350 }, { "epoch": 0.07197047365183515, "grad_norm": 1.5540159940719604, "learning_rate": 1.9908347717676318e-05, "loss": 1.0605, "step": 351 }, { "epoch": 0.07217551773631331, "grad_norm": 1.5421314239501953, "learning_rate": 1.9907448355560094e-05, "loss": 1.1114, "step": 352 }, { "epoch": 0.07238056182079147, "grad_norm": 1.5160667896270752, "learning_rate": 1.9906544622861944e-05, "loss": 1.0217, "step": 353 }, { "epoch": 0.07258560590526963, "grad_norm": 1.6242144107818604, "learning_rate": 1.9905636519980546e-05, "loss": 1.0729, "step": 354 }, { "epoch": 0.0727906499897478, "grad_norm": 1.384382724761963, "learning_rate": 1.9904724047316495e-05, "loss": 1.0713, "step": 355 }, { "epoch": 0.07299569407422596, "grad_norm": 1.4977489709854126, "learning_rate": 1.9903807205272333e-05, "loss": 1.0148, "step": 356 }, { "epoch": 0.07320073815870412, "grad_norm": 1.695237636566162, "learning_rate": 1.9902885994252506e-05, "loss": 1.0337, "step": 357 }, { "epoch": 0.07340578224318228, "grad_norm": 1.4282013177871704, "learning_rate": 1.99019604146634e-05, "loss": 1.0301, "step": 358 }, { "epoch": 0.07361082632766044, "grad_norm": 1.6131590604782104, "learning_rate": 1.9901030466913333e-05, "loss": 1.0585, "step": 359 }, { "epoch": 0.0738158704121386, "grad_norm": 1.4545314311981201, "learning_rate": 1.990009615141253e-05, "loss": 1.0172, "step": 360 }, { "epoch": 0.07402091449661677, "grad_norm": 1.3618910312652588, "learning_rate": 1.9899157468573165e-05, "loss": 0.9796, "step": 361 }, { "epoch": 0.07422595858109493, "grad_norm": 1.407371163368225, "learning_rate": 1.989821441880933e-05, "loss": 1.0584, "step": 362 }, { "epoch": 0.07443100266557309, "grad_norm": 1.5841455459594727, "learning_rate": 1.9897267002537036e-05, "loss": 1.0707, "step": 363 }, { "epoch": 0.07463604675005125, "grad_norm": 1.4612938165664673, "learning_rate": 1.989631522017424e-05, "loss": 1.0985, "step": 364 }, { "epoch": 0.07484109083452943, "grad_norm": 1.393827199935913, "learning_rate": 1.9895359072140797e-05, "loss": 0.9501, "step": 365 }, { "epoch": 0.07504613491900759, "grad_norm": 1.4273533821105957, "learning_rate": 1.9894398558858513e-05, "loss": 0.9492, "step": 366 }, { "epoch": 0.07525117900348575, "grad_norm": 1.645163893699646, "learning_rate": 1.9893433680751105e-05, "loss": 1.1085, "step": 367 }, { "epoch": 0.07545622308796392, "grad_norm": 1.4539247751235962, "learning_rate": 1.9892464438244223e-05, "loss": 0.9708, "step": 368 }, { "epoch": 0.07566126717244208, "grad_norm": 1.514987587928772, "learning_rate": 1.989149083176544e-05, "loss": 1.0696, "step": 369 }, { "epoch": 0.07586631125692024, "grad_norm": 1.4892765283584595, "learning_rate": 1.989051286174425e-05, "loss": 0.9856, "step": 370 }, { "epoch": 0.0760713553413984, "grad_norm": 1.536570429801941, "learning_rate": 1.988953052861208e-05, "loss": 1.0687, "step": 371 }, { "epoch": 0.07627639942587656, "grad_norm": 1.5202889442443848, "learning_rate": 1.9888543832802277e-05, "loss": 1.0801, "step": 372 }, { "epoch": 0.07648144351035473, "grad_norm": 1.544765591621399, "learning_rate": 1.988755277475011e-05, "loss": 0.9772, "step": 373 }, { "epoch": 0.07668648759483289, "grad_norm": 1.522325873374939, "learning_rate": 1.9886557354892777e-05, "loss": 1.0802, "step": 374 }, { "epoch": 0.07689153167931105, "grad_norm": 1.5024863481521606, "learning_rate": 1.98855575736694e-05, "loss": 1.0184, "step": 375 }, { "epoch": 0.07709657576378921, "grad_norm": 1.4701449871063232, "learning_rate": 1.9884553431521024e-05, "loss": 1.0375, "step": 376 }, { "epoch": 0.07730161984826737, "grad_norm": 1.5133960247039795, "learning_rate": 1.9883544928890612e-05, "loss": 0.9965, "step": 377 }, { "epoch": 0.07750666393274554, "grad_norm": 1.6016494035720825, "learning_rate": 1.988253206622306e-05, "loss": 1.0081, "step": 378 }, { "epoch": 0.0777117080172237, "grad_norm": 1.418439269065857, "learning_rate": 1.9881514843965185e-05, "loss": 1.0172, "step": 379 }, { "epoch": 0.07791675210170186, "grad_norm": 1.4121787548065186, "learning_rate": 1.9880493262565724e-05, "loss": 1.0183, "step": 380 }, { "epoch": 0.07812179618618002, "grad_norm": 1.6000322103500366, "learning_rate": 1.9879467322475334e-05, "loss": 1.0506, "step": 381 }, { "epoch": 0.07832684027065819, "grad_norm": 1.5708181858062744, "learning_rate": 1.9878437024146603e-05, "loss": 1.0173, "step": 382 }, { "epoch": 0.07853188435513636, "grad_norm": 1.5645391941070557, "learning_rate": 1.987740236803404e-05, "loss": 0.9731, "step": 383 }, { "epoch": 0.07873692843961452, "grad_norm": 1.5638173818588257, "learning_rate": 1.987636335459407e-05, "loss": 1.0812, "step": 384 }, { "epoch": 0.07894197252409269, "grad_norm": 1.5853748321533203, "learning_rate": 1.987531998428505e-05, "loss": 1.0309, "step": 385 }, { "epoch": 0.07914701660857085, "grad_norm": 1.4507609605789185, "learning_rate": 1.9874272257567243e-05, "loss": 0.9727, "step": 386 }, { "epoch": 0.07935206069304901, "grad_norm": 1.566808819770813, "learning_rate": 1.9873220174902857e-05, "loss": 1.0676, "step": 387 }, { "epoch": 0.07955710477752717, "grad_norm": 1.6446616649627686, "learning_rate": 1.9872163736756e-05, "loss": 1.0476, "step": 388 }, { "epoch": 0.07976214886200533, "grad_norm": 1.628389596939087, "learning_rate": 1.9871102943592717e-05, "loss": 1.0564, "step": 389 }, { "epoch": 0.0799671929464835, "grad_norm": 1.5101680755615234, "learning_rate": 1.9870037795880962e-05, "loss": 1.0964, "step": 390 }, { "epoch": 0.08017223703096166, "grad_norm": 1.5730286836624146, "learning_rate": 1.9868968294090617e-05, "loss": 1.0253, "step": 391 }, { "epoch": 0.08037728111543982, "grad_norm": 1.4599535465240479, "learning_rate": 1.986789443869348e-05, "loss": 1.0741, "step": 392 }, { "epoch": 0.08058232519991798, "grad_norm": 1.5081744194030762, "learning_rate": 1.9866816230163278e-05, "loss": 1.0634, "step": 393 }, { "epoch": 0.08078736928439614, "grad_norm": 1.3710395097732544, "learning_rate": 1.9865733668975653e-05, "loss": 0.9933, "step": 394 }, { "epoch": 0.0809924133688743, "grad_norm": 1.452233910560608, "learning_rate": 1.986464675560816e-05, "loss": 1.0287, "step": 395 }, { "epoch": 0.08119745745335247, "grad_norm": 1.523262858390808, "learning_rate": 1.986355549054029e-05, "loss": 1.0235, "step": 396 }, { "epoch": 0.08140250153783063, "grad_norm": 1.5054924488067627, "learning_rate": 1.9862459874253438e-05, "loss": 1.0087, "step": 397 }, { "epoch": 0.08160754562230879, "grad_norm": 1.535515308380127, "learning_rate": 1.986135990723093e-05, "loss": 1.0747, "step": 398 }, { "epoch": 0.08181258970678695, "grad_norm": 1.5440744161605835, "learning_rate": 1.9860255589958008e-05, "loss": 1.0141, "step": 399 }, { "epoch": 0.08201763379126512, "grad_norm": 1.493699073791504, "learning_rate": 1.985914692292182e-05, "loss": 1.0442, "step": 400 }, { "epoch": 0.08222267787574329, "grad_norm": 1.6218425035476685, "learning_rate": 1.9858033906611458e-05, "loss": 1.1083, "step": 401 }, { "epoch": 0.08242772196022145, "grad_norm": 1.4994029998779297, "learning_rate": 1.985691654151791e-05, "loss": 0.9731, "step": 402 }, { "epoch": 0.08263276604469962, "grad_norm": 1.5255396366119385, "learning_rate": 1.98557948281341e-05, "loss": 1.0089, "step": 403 }, { "epoch": 0.08283781012917778, "grad_norm": 1.4672237634658813, "learning_rate": 1.9854668766954856e-05, "loss": 1.04, "step": 404 }, { "epoch": 0.08304285421365594, "grad_norm": 1.444637656211853, "learning_rate": 1.9853538358476933e-05, "loss": 0.9651, "step": 405 }, { "epoch": 0.0832478982981341, "grad_norm": 1.5379564762115479, "learning_rate": 1.9852403603198994e-05, "loss": 0.9489, "step": 406 }, { "epoch": 0.08345294238261226, "grad_norm": 1.4553699493408203, "learning_rate": 1.9851264501621635e-05, "loss": 1.0638, "step": 407 }, { "epoch": 0.08365798646709043, "grad_norm": 1.5500562191009521, "learning_rate": 1.9850121054247353e-05, "loss": 1.0546, "step": 408 }, { "epoch": 0.08386303055156859, "grad_norm": 1.4928547143936157, "learning_rate": 1.9848973261580575e-05, "loss": 1.1077, "step": 409 }, { "epoch": 0.08406807463604675, "grad_norm": 1.6979098320007324, "learning_rate": 1.9847821124127638e-05, "loss": 1.1127, "step": 410 }, { "epoch": 0.08427311872052491, "grad_norm": 1.470423698425293, "learning_rate": 1.9846664642396793e-05, "loss": 1.029, "step": 411 }, { "epoch": 0.08447816280500307, "grad_norm": 1.584097981452942, "learning_rate": 1.984550381689822e-05, "loss": 1.0907, "step": 412 }, { "epoch": 0.08468320688948124, "grad_norm": 1.4839553833007812, "learning_rate": 1.9844338648143993e-05, "loss": 1.0092, "step": 413 }, { "epoch": 0.0848882509739594, "grad_norm": 1.4527688026428223, "learning_rate": 1.984316913664813e-05, "loss": 1.0626, "step": 414 }, { "epoch": 0.08509329505843756, "grad_norm": 1.2951841354370117, "learning_rate": 1.9841995282926545e-05, "loss": 0.9894, "step": 415 }, { "epoch": 0.08529833914291572, "grad_norm": 1.4556200504302979, "learning_rate": 1.984081708749707e-05, "loss": 1.0387, "step": 416 }, { "epoch": 0.08550338322739388, "grad_norm": 1.4940464496612549, "learning_rate": 1.983963455087946e-05, "loss": 1.0, "step": 417 }, { "epoch": 0.08570842731187205, "grad_norm": 1.5325560569763184, "learning_rate": 1.983844767359538e-05, "loss": 1.0845, "step": 418 }, { "epoch": 0.08591347139635022, "grad_norm": 1.4316673278808594, "learning_rate": 1.9837256456168408e-05, "loss": 0.9856, "step": 419 }, { "epoch": 0.08611851548082838, "grad_norm": 1.4618996381759644, "learning_rate": 1.983606089912404e-05, "loss": 1.0497, "step": 420 }, { "epoch": 0.08632355956530655, "grad_norm": 1.3947030305862427, "learning_rate": 1.9834861002989683e-05, "loss": 1.0387, "step": 421 }, { "epoch": 0.08652860364978471, "grad_norm": 1.5020296573638916, "learning_rate": 1.983365676829466e-05, "loss": 1.0739, "step": 422 }, { "epoch": 0.08673364773426287, "grad_norm": 1.4588593244552612, "learning_rate": 1.983244819557022e-05, "loss": 1.0595, "step": 423 }, { "epoch": 0.08693869181874103, "grad_norm": 1.3970704078674316, "learning_rate": 1.9831235285349496e-05, "loss": 0.9636, "step": 424 }, { "epoch": 0.0871437359032192, "grad_norm": 1.5711934566497803, "learning_rate": 1.9830018038167563e-05, "loss": 1.0951, "step": 425 }, { "epoch": 0.08734877998769736, "grad_norm": 1.607322096824646, "learning_rate": 1.9828796454561398e-05, "loss": 1.0923, "step": 426 }, { "epoch": 0.08755382407217552, "grad_norm": 1.4291521310806274, "learning_rate": 1.982757053506989e-05, "loss": 1.0907, "step": 427 }, { "epoch": 0.08775886815665368, "grad_norm": 1.4423432350158691, "learning_rate": 1.9826340280233844e-05, "loss": 1.0203, "step": 428 }, { "epoch": 0.08796391224113184, "grad_norm": 1.5424449443817139, "learning_rate": 1.9825105690595975e-05, "loss": 1.0053, "step": 429 }, { "epoch": 0.08816895632561, "grad_norm": 1.5657621622085571, "learning_rate": 1.982386676670091e-05, "loss": 1.0052, "step": 430 }, { "epoch": 0.08837400041008817, "grad_norm": 1.5202655792236328, "learning_rate": 1.982262350909519e-05, "loss": 1.0881, "step": 431 }, { "epoch": 0.08857904449456633, "grad_norm": 1.5306557416915894, "learning_rate": 1.9821375918327268e-05, "loss": 1.0788, "step": 432 }, { "epoch": 0.08878408857904449, "grad_norm": 1.516223430633545, "learning_rate": 1.9820123994947505e-05, "loss": 1.0988, "step": 433 }, { "epoch": 0.08898913266352265, "grad_norm": 1.4348759651184082, "learning_rate": 1.9818867739508177e-05, "loss": 1.0542, "step": 434 }, { "epoch": 0.08919417674800081, "grad_norm": 1.5361237525939941, "learning_rate": 1.9817607152563472e-05, "loss": 1.0964, "step": 435 }, { "epoch": 0.08939922083247898, "grad_norm": 1.5103068351745605, "learning_rate": 1.9816342234669482e-05, "loss": 0.9663, "step": 436 }, { "epoch": 0.08960426491695715, "grad_norm": 1.3677356243133545, "learning_rate": 1.981507298638422e-05, "loss": 0.9983, "step": 437 }, { "epoch": 0.08980930900143531, "grad_norm": 1.6008367538452148, "learning_rate": 1.9813799408267598e-05, "loss": 1.0839, "step": 438 }, { "epoch": 0.09001435308591348, "grad_norm": 1.3973186016082764, "learning_rate": 1.981252150088145e-05, "loss": 0.9354, "step": 439 }, { "epoch": 0.09021939717039164, "grad_norm": 1.4950793981552124, "learning_rate": 1.9811239264789504e-05, "loss": 0.9601, "step": 440 }, { "epoch": 0.0904244412548698, "grad_norm": 1.479514479637146, "learning_rate": 1.980995270055741e-05, "loss": 1.0288, "step": 441 }, { "epoch": 0.09062948533934796, "grad_norm": 1.5111435651779175, "learning_rate": 1.9808661808752735e-05, "loss": 0.9775, "step": 442 }, { "epoch": 0.09083452942382612, "grad_norm": 1.5741885900497437, "learning_rate": 1.980736658994493e-05, "loss": 1.0733, "step": 443 }, { "epoch": 0.09103957350830429, "grad_norm": 1.6076592206954956, "learning_rate": 1.9806067044705375e-05, "loss": 1.0399, "step": 444 }, { "epoch": 0.09124461759278245, "grad_norm": 1.4595375061035156, "learning_rate": 1.9804763173607354e-05, "loss": 0.9251, "step": 445 }, { "epoch": 0.09144966167726061, "grad_norm": 1.5212647914886475, "learning_rate": 1.9803454977226057e-05, "loss": 0.9896, "step": 446 }, { "epoch": 0.09165470576173877, "grad_norm": 1.3784384727478027, "learning_rate": 1.980214245613858e-05, "loss": 0.9906, "step": 447 }, { "epoch": 0.09185974984621693, "grad_norm": 1.501894474029541, "learning_rate": 1.9800825610923937e-05, "loss": 1.0605, "step": 448 }, { "epoch": 0.0920647939306951, "grad_norm": 1.3499666452407837, "learning_rate": 1.9799504442163037e-05, "loss": 0.9732, "step": 449 }, { "epoch": 0.09226983801517326, "grad_norm": 1.473997950553894, "learning_rate": 1.9798178950438702e-05, "loss": 1.0409, "step": 450 }, { "epoch": 0.09247488209965142, "grad_norm": 1.4975616931915283, "learning_rate": 1.979684913633566e-05, "loss": 1.0975, "step": 451 }, { "epoch": 0.09267992618412958, "grad_norm": 1.3739979267120361, "learning_rate": 1.979551500044055e-05, "loss": 1.0059, "step": 452 }, { "epoch": 0.09288497026860774, "grad_norm": 1.612618327140808, "learning_rate": 1.9794176543341914e-05, "loss": 1.0582, "step": 453 }, { "epoch": 0.09309001435308592, "grad_norm": 1.4669066667556763, "learning_rate": 1.9792833765630193e-05, "loss": 1.0232, "step": 454 }, { "epoch": 0.09329505843756408, "grad_norm": 1.5819953680038452, "learning_rate": 1.979148666789775e-05, "loss": 1.0295, "step": 455 }, { "epoch": 0.09350010252204224, "grad_norm": 1.3856197595596313, "learning_rate": 1.9790135250738843e-05, "loss": 0.9533, "step": 456 }, { "epoch": 0.0937051466065204, "grad_norm": 1.4127904176712036, "learning_rate": 1.9788779514749635e-05, "loss": 1.0054, "step": 457 }, { "epoch": 0.09391019069099857, "grad_norm": 1.4193931818008423, "learning_rate": 1.97874194605282e-05, "loss": 0.9286, "step": 458 }, { "epoch": 0.09411523477547673, "grad_norm": 1.7053059339523315, "learning_rate": 1.9786055088674514e-05, "loss": 1.0326, "step": 459 }, { "epoch": 0.09432027885995489, "grad_norm": 1.4834471940994263, "learning_rate": 1.9784686399790453e-05, "loss": 0.9453, "step": 460 }, { "epoch": 0.09452532294443305, "grad_norm": 1.446616768836975, "learning_rate": 1.9783313394479804e-05, "loss": 1.0609, "step": 461 }, { "epoch": 0.09473036702891122, "grad_norm": 1.3646531105041504, "learning_rate": 1.978193607334826e-05, "loss": 1.0934, "step": 462 }, { "epoch": 0.09493541111338938, "grad_norm": 1.5618114471435547, "learning_rate": 1.9780554437003402e-05, "loss": 1.0662, "step": 463 }, { "epoch": 0.09514045519786754, "grad_norm": 1.368046760559082, "learning_rate": 1.977916848605474e-05, "loss": 0.9289, "step": 464 }, { "epoch": 0.0953454992823457, "grad_norm": 1.41494619846344, "learning_rate": 1.9777778221113667e-05, "loss": 0.9922, "step": 465 }, { "epoch": 0.09555054336682386, "grad_norm": 1.4514696598052979, "learning_rate": 1.977638364279349e-05, "loss": 1.0396, "step": 466 }, { "epoch": 0.09575558745130203, "grad_norm": 1.3683501482009888, "learning_rate": 1.977498475170941e-05, "loss": 1.1052, "step": 467 }, { "epoch": 0.09596063153578019, "grad_norm": 1.4899985790252686, "learning_rate": 1.9773581548478534e-05, "loss": 1.0881, "step": 468 }, { "epoch": 0.09616567562025835, "grad_norm": 1.559800386428833, "learning_rate": 1.9772174033719883e-05, "loss": 1.0407, "step": 469 }, { "epoch": 0.09637071970473651, "grad_norm": 1.4429481029510498, "learning_rate": 1.9770762208054358e-05, "loss": 1.0813, "step": 470 }, { "epoch": 0.09657576378921467, "grad_norm": 1.6168667078018188, "learning_rate": 1.9769346072104777e-05, "loss": 0.9858, "step": 471 }, { "epoch": 0.09678080787369285, "grad_norm": 1.437160611152649, "learning_rate": 1.9767925626495857e-05, "loss": 1.0374, "step": 472 }, { "epoch": 0.09698585195817101, "grad_norm": 1.505539894104004, "learning_rate": 1.9766500871854216e-05, "loss": 1.067, "step": 473 }, { "epoch": 0.09719089604264917, "grad_norm": 1.4109601974487305, "learning_rate": 1.9765071808808365e-05, "loss": 0.9528, "step": 474 }, { "epoch": 0.09739594012712734, "grad_norm": 1.4648572206497192, "learning_rate": 1.9763638437988732e-05, "loss": 1.0164, "step": 475 }, { "epoch": 0.0976009842116055, "grad_norm": 1.4685672521591187, "learning_rate": 1.9762200760027626e-05, "loss": 1.0294, "step": 476 }, { "epoch": 0.09780602829608366, "grad_norm": 1.3597288131713867, "learning_rate": 1.9760758775559275e-05, "loss": 1.0606, "step": 477 }, { "epoch": 0.09801107238056182, "grad_norm": 1.4537463188171387, "learning_rate": 1.9759312485219787e-05, "loss": 0.9668, "step": 478 }, { "epoch": 0.09821611646503998, "grad_norm": 1.5006306171417236, "learning_rate": 1.975786188964719e-05, "loss": 1.0781, "step": 479 }, { "epoch": 0.09842116054951815, "grad_norm": 1.4371659755706787, "learning_rate": 1.9756406989481398e-05, "loss": 0.9594, "step": 480 }, { "epoch": 0.09862620463399631, "grad_norm": 1.6345086097717285, "learning_rate": 1.9754947785364224e-05, "loss": 1.1195, "step": 481 }, { "epoch": 0.09883124871847447, "grad_norm": 1.4773435592651367, "learning_rate": 1.975348427793939e-05, "loss": 1.0344, "step": 482 }, { "epoch": 0.09903629280295263, "grad_norm": 1.526402473449707, "learning_rate": 1.97520164678525e-05, "loss": 1.0705, "step": 483 }, { "epoch": 0.0992413368874308, "grad_norm": 1.3998318910598755, "learning_rate": 1.975054435575107e-05, "loss": 1.1018, "step": 484 }, { "epoch": 0.09944638097190896, "grad_norm": 1.5282282829284668, "learning_rate": 1.974906794228451e-05, "loss": 1.0816, "step": 485 }, { "epoch": 0.09965142505638712, "grad_norm": 1.3764220476150513, "learning_rate": 1.974758722810412e-05, "loss": 0.976, "step": 486 }, { "epoch": 0.09985646914086528, "grad_norm": 1.4538031816482544, "learning_rate": 1.9746102213863113e-05, "loss": 0.9698, "step": 487 }, { "epoch": 0.10006151322534344, "grad_norm": 1.5212188959121704, "learning_rate": 1.9744612900216588e-05, "loss": 0.9927, "step": 488 }, { "epoch": 0.1002665573098216, "grad_norm": 1.5121041536331177, "learning_rate": 1.9743119287821537e-05, "loss": 1.1095, "step": 489 }, { "epoch": 0.10047160139429978, "grad_norm": 1.4456270933151245, "learning_rate": 1.9741621377336856e-05, "loss": 1.0494, "step": 490 }, { "epoch": 0.10067664547877794, "grad_norm": 1.5466268062591553, "learning_rate": 1.9740119169423337e-05, "loss": 0.9953, "step": 491 }, { "epoch": 0.1008816895632561, "grad_norm": 1.4161945581436157, "learning_rate": 1.973861266474366e-05, "loss": 1.0473, "step": 492 }, { "epoch": 0.10108673364773427, "grad_norm": 1.5893555879592896, "learning_rate": 1.973710186396242e-05, "loss": 1.0511, "step": 493 }, { "epoch": 0.10129177773221243, "grad_norm": 1.5507982969284058, "learning_rate": 1.9735586767746072e-05, "loss": 1.0561, "step": 494 }, { "epoch": 0.10149682181669059, "grad_norm": 1.372351050376892, "learning_rate": 1.9734067376763004e-05, "loss": 1.0654, "step": 495 }, { "epoch": 0.10170186590116875, "grad_norm": 1.3648560047149658, "learning_rate": 1.9732543691683475e-05, "loss": 0.9923, "step": 496 }, { "epoch": 0.10190690998564692, "grad_norm": 1.4787548780441284, "learning_rate": 1.9731015713179643e-05, "loss": 0.998, "step": 497 }, { "epoch": 0.10211195407012508, "grad_norm": 1.5620167255401611, "learning_rate": 1.972948344192557e-05, "loss": 1.0128, "step": 498 }, { "epoch": 0.10231699815460324, "grad_norm": 1.4270232915878296, "learning_rate": 1.9727946878597193e-05, "loss": 1.0165, "step": 499 }, { "epoch": 0.1025220422390814, "grad_norm": 1.733803153038025, "learning_rate": 1.9726406023872368e-05, "loss": 1.0064, "step": 500 }, { "epoch": 0.10272708632355956, "grad_norm": 1.5237818956375122, "learning_rate": 1.9724860878430814e-05, "loss": 1.0659, "step": 501 }, { "epoch": 0.10293213040803773, "grad_norm": 1.7158706188201904, "learning_rate": 1.9723311442954163e-05, "loss": 0.9561, "step": 502 }, { "epoch": 0.10313717449251589, "grad_norm": 1.3859798908233643, "learning_rate": 1.9721757718125934e-05, "loss": 1.0811, "step": 503 }, { "epoch": 0.10334221857699405, "grad_norm": 1.496572494506836, "learning_rate": 1.9720199704631546e-05, "loss": 1.0486, "step": 504 }, { "epoch": 0.10354726266147221, "grad_norm": 1.5522085428237915, "learning_rate": 1.9718637403158297e-05, "loss": 1.0249, "step": 505 }, { "epoch": 0.10375230674595037, "grad_norm": 1.4271234273910522, "learning_rate": 1.9717070814395378e-05, "loss": 1.0155, "step": 506 }, { "epoch": 0.10395735083042854, "grad_norm": 1.348629117012024, "learning_rate": 1.9715499939033883e-05, "loss": 0.9143, "step": 507 }, { "epoch": 0.10416239491490671, "grad_norm": 1.3935680389404297, "learning_rate": 1.971392477776678e-05, "loss": 0.9096, "step": 508 }, { "epoch": 0.10436743899938487, "grad_norm": 1.490793228149414, "learning_rate": 1.9712345331288952e-05, "loss": 1.0047, "step": 509 }, { "epoch": 0.10457248308386304, "grad_norm": 1.4103447198867798, "learning_rate": 1.9710761600297147e-05, "loss": 0.9984, "step": 510 }, { "epoch": 0.1047775271683412, "grad_norm": 1.3761626482009888, "learning_rate": 1.9709173585490017e-05, "loss": 0.9682, "step": 511 }, { "epoch": 0.10498257125281936, "grad_norm": 1.4523767232894897, "learning_rate": 1.9707581287568094e-05, "loss": 0.9689, "step": 512 }, { "epoch": 0.10518761533729752, "grad_norm": 1.4180318117141724, "learning_rate": 1.970598470723382e-05, "loss": 1.0667, "step": 513 }, { "epoch": 0.10539265942177568, "grad_norm": 1.4825658798217773, "learning_rate": 1.97043838451915e-05, "loss": 1.0243, "step": 514 }, { "epoch": 0.10559770350625385, "grad_norm": 1.5177512168884277, "learning_rate": 1.9702778702147342e-05, "loss": 0.9698, "step": 515 }, { "epoch": 0.10580274759073201, "grad_norm": 1.446925163269043, "learning_rate": 1.9701169278809442e-05, "loss": 1.0141, "step": 516 }, { "epoch": 0.10600779167521017, "grad_norm": 1.4586217403411865, "learning_rate": 1.969955557588778e-05, "loss": 1.0387, "step": 517 }, { "epoch": 0.10621283575968833, "grad_norm": 1.5739994049072266, "learning_rate": 1.9697937594094233e-05, "loss": 0.9846, "step": 518 }, { "epoch": 0.1064178798441665, "grad_norm": 1.430864930152893, "learning_rate": 1.969631533414256e-05, "loss": 0.9627, "step": 519 }, { "epoch": 0.10662292392864466, "grad_norm": 1.398065447807312, "learning_rate": 1.9694688796748393e-05, "loss": 0.9873, "step": 520 }, { "epoch": 0.10682796801312282, "grad_norm": 1.5213243961334229, "learning_rate": 1.9693057982629277e-05, "loss": 0.9808, "step": 521 }, { "epoch": 0.10703301209760098, "grad_norm": 1.415981411933899, "learning_rate": 1.9691422892504626e-05, "loss": 1.0108, "step": 522 }, { "epoch": 0.10723805618207914, "grad_norm": 1.4257097244262695, "learning_rate": 1.9689783527095748e-05, "loss": 1.0171, "step": 523 }, { "epoch": 0.1074431002665573, "grad_norm": 1.4383496046066284, "learning_rate": 1.968813988712583e-05, "loss": 1.0131, "step": 524 }, { "epoch": 0.10764814435103547, "grad_norm": 1.5926874876022339, "learning_rate": 1.9686491973319953e-05, "loss": 1.0624, "step": 525 }, { "epoch": 0.10785318843551364, "grad_norm": 1.3990124464035034, "learning_rate": 1.9684839786405082e-05, "loss": 1.0195, "step": 526 }, { "epoch": 0.1080582325199918, "grad_norm": 1.477275013923645, "learning_rate": 1.968318332711006e-05, "loss": 1.0801, "step": 527 }, { "epoch": 0.10826327660446997, "grad_norm": 1.4530106782913208, "learning_rate": 1.9681522596165615e-05, "loss": 0.9671, "step": 528 }, { "epoch": 0.10846832068894813, "grad_norm": 1.4229207038879395, "learning_rate": 1.967985759430437e-05, "loss": 1.0405, "step": 529 }, { "epoch": 0.10867336477342629, "grad_norm": 1.4738223552703857, "learning_rate": 1.9678188322260828e-05, "loss": 0.9745, "step": 530 }, { "epoch": 0.10887840885790445, "grad_norm": 1.559181571006775, "learning_rate": 1.9676514780771364e-05, "loss": 0.9999, "step": 531 }, { "epoch": 0.10908345294238261, "grad_norm": 1.4503504037857056, "learning_rate": 1.9674836970574253e-05, "loss": 1.0675, "step": 532 }, { "epoch": 0.10928849702686078, "grad_norm": 1.4990558624267578, "learning_rate": 1.9673154892409645e-05, "loss": 0.9834, "step": 533 }, { "epoch": 0.10949354111133894, "grad_norm": 1.5804951190948486, "learning_rate": 1.9671468547019575e-05, "loss": 1.0333, "step": 534 }, { "epoch": 0.1096985851958171, "grad_norm": 1.5068517923355103, "learning_rate": 1.9669777935147955e-05, "loss": 1.058, "step": 535 }, { "epoch": 0.10990362928029526, "grad_norm": 1.5603541135787964, "learning_rate": 1.966808305754058e-05, "loss": 1.0677, "step": 536 }, { "epoch": 0.11010867336477342, "grad_norm": 1.3914179801940918, "learning_rate": 1.966638391494514e-05, "loss": 0.9732, "step": 537 }, { "epoch": 0.11031371744925159, "grad_norm": 1.5007535219192505, "learning_rate": 1.9664680508111193e-05, "loss": 0.9715, "step": 538 }, { "epoch": 0.11051876153372975, "grad_norm": 1.5556191205978394, "learning_rate": 1.9662972837790176e-05, "loss": 1.008, "step": 539 }, { "epoch": 0.11072380561820791, "grad_norm": 1.4767568111419678, "learning_rate": 1.9661260904735422e-05, "loss": 1.0782, "step": 540 }, { "epoch": 0.11092884970268607, "grad_norm": 1.5211390256881714, "learning_rate": 1.9659544709702127e-05, "loss": 1.0338, "step": 541 }, { "epoch": 0.11113389378716423, "grad_norm": 1.4908878803253174, "learning_rate": 1.9657824253447378e-05, "loss": 1.0558, "step": 542 }, { "epoch": 0.11133893787164241, "grad_norm": 1.3833034038543701, "learning_rate": 1.965609953673014e-05, "loss": 0.9971, "step": 543 }, { "epoch": 0.11154398195612057, "grad_norm": 1.5602205991744995, "learning_rate": 1.965437056031125e-05, "loss": 0.9964, "step": 544 }, { "epoch": 0.11174902604059873, "grad_norm": 1.4317163228988647, "learning_rate": 1.965263732495344e-05, "loss": 0.9945, "step": 545 }, { "epoch": 0.1119540701250769, "grad_norm": 1.4552747011184692, "learning_rate": 1.9650899831421302e-05, "loss": 0.9923, "step": 546 }, { "epoch": 0.11215911420955506, "grad_norm": 1.4863455295562744, "learning_rate": 1.9649158080481327e-05, "loss": 1.0857, "step": 547 }, { "epoch": 0.11236415829403322, "grad_norm": 1.3648884296417236, "learning_rate": 1.9647412072901863e-05, "loss": 0.9007, "step": 548 }, { "epoch": 0.11256920237851138, "grad_norm": 1.4798994064331055, "learning_rate": 1.9645661809453145e-05, "loss": 1.0012, "step": 549 }, { "epoch": 0.11277424646298954, "grad_norm": 1.5307090282440186, "learning_rate": 1.9643907290907292e-05, "loss": 1.0332, "step": 550 }, { "epoch": 0.1129792905474677, "grad_norm": 1.5267423391342163, "learning_rate": 1.964214851803829e-05, "loss": 0.971, "step": 551 }, { "epoch": 0.11318433463194587, "grad_norm": 1.524130940437317, "learning_rate": 1.964038549162201e-05, "loss": 1.1295, "step": 552 }, { "epoch": 0.11338937871642403, "grad_norm": 1.5156323909759521, "learning_rate": 1.963861821243619e-05, "loss": 1.0146, "step": 553 }, { "epoch": 0.11359442280090219, "grad_norm": 1.4097462892532349, "learning_rate": 1.963684668126046e-05, "loss": 1.0467, "step": 554 }, { "epoch": 0.11379946688538035, "grad_norm": 1.4109396934509277, "learning_rate": 1.96350708988763e-05, "loss": 1.0272, "step": 555 }, { "epoch": 0.11400451096985852, "grad_norm": 1.3423645496368408, "learning_rate": 1.963329086606709e-05, "loss": 0.9699, "step": 556 }, { "epoch": 0.11420955505433668, "grad_norm": 1.4466307163238525, "learning_rate": 1.963150658361807e-05, "loss": 1.0753, "step": 557 }, { "epoch": 0.11441459913881484, "grad_norm": 1.4457483291625977, "learning_rate": 1.9629718052316365e-05, "loss": 0.9168, "step": 558 }, { "epoch": 0.114619643223293, "grad_norm": 1.4597246646881104, "learning_rate": 1.962792527295097e-05, "loss": 1.0886, "step": 559 }, { "epoch": 0.11482468730777116, "grad_norm": 1.4614853858947754, "learning_rate": 1.962612824631275e-05, "loss": 1.0331, "step": 560 }, { "epoch": 0.11502973139224934, "grad_norm": 1.588301658630371, "learning_rate": 1.9624326973194448e-05, "loss": 1.0304, "step": 561 }, { "epoch": 0.1152347754767275, "grad_norm": 1.2604318857192993, "learning_rate": 1.962252145439068e-05, "loss": 0.9964, "step": 562 }, { "epoch": 0.11543981956120566, "grad_norm": 1.503849983215332, "learning_rate": 1.9620711690697936e-05, "loss": 1.0126, "step": 563 }, { "epoch": 0.11564486364568383, "grad_norm": 1.4587541818618774, "learning_rate": 1.9618897682914576e-05, "loss": 1.0982, "step": 564 }, { "epoch": 0.11584990773016199, "grad_norm": 1.4515243768692017, "learning_rate": 1.961707943184083e-05, "loss": 1.0315, "step": 565 }, { "epoch": 0.11605495181464015, "grad_norm": 1.5895812511444092, "learning_rate": 1.961525693827881e-05, "loss": 1.0065, "step": 566 }, { "epoch": 0.11625999589911831, "grad_norm": 1.5462688207626343, "learning_rate": 1.9613430203032486e-05, "loss": 0.9706, "step": 567 }, { "epoch": 0.11646503998359647, "grad_norm": 1.4238090515136719, "learning_rate": 1.961159922690771e-05, "loss": 1.0222, "step": 568 }, { "epoch": 0.11667008406807464, "grad_norm": 1.4306422472000122, "learning_rate": 1.9609764010712197e-05, "loss": 0.9379, "step": 569 }, { "epoch": 0.1168751281525528, "grad_norm": 1.4894821643829346, "learning_rate": 1.960792455525554e-05, "loss": 0.9687, "step": 570 }, { "epoch": 0.11708017223703096, "grad_norm": 1.4744130373001099, "learning_rate": 1.9606080861349194e-05, "loss": 1.0669, "step": 571 }, { "epoch": 0.11728521632150912, "grad_norm": 1.3763957023620605, "learning_rate": 1.9604232929806493e-05, "loss": 1.0207, "step": 572 }, { "epoch": 0.11749026040598728, "grad_norm": 1.393510103225708, "learning_rate": 1.9602380761442632e-05, "loss": 1.0745, "step": 573 }, { "epoch": 0.11769530449046545, "grad_norm": 1.4931776523590088, "learning_rate": 1.960052435707468e-05, "loss": 1.0154, "step": 574 }, { "epoch": 0.11790034857494361, "grad_norm": 1.4898720979690552, "learning_rate": 1.959866371752157e-05, "loss": 1.0303, "step": 575 }, { "epoch": 0.11810539265942177, "grad_norm": 1.4896934032440186, "learning_rate": 1.9596798843604114e-05, "loss": 1.0508, "step": 576 }, { "epoch": 0.11831043674389993, "grad_norm": 1.4025113582611084, "learning_rate": 1.9594929736144978e-05, "loss": 1.0108, "step": 577 }, { "epoch": 0.1185154808283781, "grad_norm": 1.338534951210022, "learning_rate": 1.95930563959687e-05, "loss": 0.9723, "step": 578 }, { "epoch": 0.11872052491285627, "grad_norm": 1.3448790311813354, "learning_rate": 1.959117882390169e-05, "loss": 1.0331, "step": 579 }, { "epoch": 0.11892556899733443, "grad_norm": 1.5760326385498047, "learning_rate": 1.9589297020772227e-05, "loss": 1.0636, "step": 580 }, { "epoch": 0.1191306130818126, "grad_norm": 1.4746993780136108, "learning_rate": 1.9587410987410446e-05, "loss": 0.9852, "step": 581 }, { "epoch": 0.11933565716629076, "grad_norm": 1.33079195022583, "learning_rate": 1.9585520724648354e-05, "loss": 0.9524, "step": 582 }, { "epoch": 0.11954070125076892, "grad_norm": 1.3502941131591797, "learning_rate": 1.9583626233319824e-05, "loss": 1.0258, "step": 583 }, { "epoch": 0.11974574533524708, "grad_norm": 1.4582306146621704, "learning_rate": 1.9581727514260597e-05, "loss": 1.0217, "step": 584 }, { "epoch": 0.11995078941972524, "grad_norm": 1.4491610527038574, "learning_rate": 1.957982456830827e-05, "loss": 1.1226, "step": 585 }, { "epoch": 0.1201558335042034, "grad_norm": 1.4811660051345825, "learning_rate": 1.9577917396302312e-05, "loss": 0.9368, "step": 586 }, { "epoch": 0.12036087758868157, "grad_norm": 1.4954718351364136, "learning_rate": 1.957600599908406e-05, "loss": 0.9833, "step": 587 }, { "epoch": 0.12056592167315973, "grad_norm": 1.3913569450378418, "learning_rate": 1.9574090377496705e-05, "loss": 0.9287, "step": 588 }, { "epoch": 0.12077096575763789, "grad_norm": 1.3077023029327393, "learning_rate": 1.957217053238531e-05, "loss": 0.942, "step": 589 }, { "epoch": 0.12097600984211605, "grad_norm": 1.5148335695266724, "learning_rate": 1.9570246464596793e-05, "loss": 1.012, "step": 590 }, { "epoch": 0.12118105392659421, "grad_norm": 1.4744688272476196, "learning_rate": 1.956831817497994e-05, "loss": 0.9591, "step": 591 }, { "epoch": 0.12138609801107238, "grad_norm": 1.373390555381775, "learning_rate": 1.95663856643854e-05, "loss": 0.9948, "step": 592 }, { "epoch": 0.12159114209555054, "grad_norm": 1.4243934154510498, "learning_rate": 1.956444893366568e-05, "loss": 1.0781, "step": 593 }, { "epoch": 0.1217961861800287, "grad_norm": 1.5546865463256836, "learning_rate": 1.9562507983675157e-05, "loss": 1.0142, "step": 594 }, { "epoch": 0.12200123026450686, "grad_norm": 1.339085578918457, "learning_rate": 1.9560562815270062e-05, "loss": 0.9547, "step": 595 }, { "epoch": 0.12220627434898503, "grad_norm": 1.345416784286499, "learning_rate": 1.9558613429308483e-05, "loss": 0.9587, "step": 596 }, { "epoch": 0.1224113184334632, "grad_norm": 1.3705790042877197, "learning_rate": 1.955665982665038e-05, "loss": 0.9746, "step": 597 }, { "epoch": 0.12261636251794136, "grad_norm": 1.3492779731750488, "learning_rate": 1.9554702008157567e-05, "loss": 0.9786, "step": 598 }, { "epoch": 0.12282140660241953, "grad_norm": 1.34085214138031, "learning_rate": 1.9552739974693714e-05, "loss": 0.9167, "step": 599 }, { "epoch": 0.12302645068689769, "grad_norm": 1.417086124420166, "learning_rate": 1.955077372712436e-05, "loss": 1.0081, "step": 600 }, { "epoch": 0.12323149477137585, "grad_norm": 1.3571921586990356, "learning_rate": 1.9548803266316893e-05, "loss": 1.0547, "step": 601 }, { "epoch": 0.12343653885585401, "grad_norm": 1.5409044027328491, "learning_rate": 1.9546828593140565e-05, "loss": 0.993, "step": 602 }, { "epoch": 0.12364158294033217, "grad_norm": 1.372724175453186, "learning_rate": 1.9544849708466486e-05, "loss": 0.955, "step": 603 }, { "epoch": 0.12384662702481034, "grad_norm": 1.4150279760360718, "learning_rate": 1.954286661316762e-05, "loss": 1.0928, "step": 604 }, { "epoch": 0.1240516711092885, "grad_norm": 1.3397700786590576, "learning_rate": 1.95408793081188e-05, "loss": 0.9699, "step": 605 }, { "epoch": 0.12425671519376666, "grad_norm": 1.522552251815796, "learning_rate": 1.95388877941967e-05, "loss": 1.122, "step": 606 }, { "epoch": 0.12446175927824482, "grad_norm": 1.2490665912628174, "learning_rate": 1.9536892072279863e-05, "loss": 1.0468, "step": 607 }, { "epoch": 0.12466680336272298, "grad_norm": 1.3811465501785278, "learning_rate": 1.953489214324868e-05, "loss": 1.0509, "step": 608 }, { "epoch": 0.12487184744720115, "grad_norm": 1.3717812299728394, "learning_rate": 1.9532888007985408e-05, "loss": 1.0574, "step": 609 }, { "epoch": 0.1250768915316793, "grad_norm": 1.548738718032837, "learning_rate": 1.9530879667374144e-05, "loss": 1.0622, "step": 610 }, { "epoch": 0.12528193561615747, "grad_norm": 1.46402907371521, "learning_rate": 1.9528867122300853e-05, "loss": 1.061, "step": 611 }, { "epoch": 0.12548697970063563, "grad_norm": 1.4120066165924072, "learning_rate": 1.9526850373653356e-05, "loss": 0.9639, "step": 612 }, { "epoch": 0.1256920237851138, "grad_norm": 1.775253176689148, "learning_rate": 1.952482942232132e-05, "loss": 1.042, "step": 613 }, { "epoch": 0.12589706786959196, "grad_norm": 1.470005750656128, "learning_rate": 1.952280426919627e-05, "loss": 1.0522, "step": 614 }, { "epoch": 0.12610211195407012, "grad_norm": 1.5282288789749146, "learning_rate": 1.952077491517158e-05, "loss": 1.018, "step": 615 }, { "epoch": 0.12630715603854828, "grad_norm": 1.4503083229064941, "learning_rate": 1.951874136114249e-05, "loss": 1.0559, "step": 616 }, { "epoch": 0.12651220012302644, "grad_norm": 1.3754431009292603, "learning_rate": 1.9516703608006074e-05, "loss": 1.01, "step": 617 }, { "epoch": 0.1267172442075046, "grad_norm": 1.45350980758667, "learning_rate": 1.951466165666128e-05, "loss": 0.9892, "step": 618 }, { "epoch": 0.12692228829198277, "grad_norm": 1.3685983419418335, "learning_rate": 1.951261550800889e-05, "loss": 0.9704, "step": 619 }, { "epoch": 0.12712733237646093, "grad_norm": 1.4136238098144531, "learning_rate": 1.9510565162951538e-05, "loss": 1.0644, "step": 620 }, { "epoch": 0.1273323764609391, "grad_norm": 1.404252290725708, "learning_rate": 1.9508510622393727e-05, "loss": 1.0401, "step": 621 }, { "epoch": 0.12753742054541725, "grad_norm": 1.5007519721984863, "learning_rate": 1.9506451887241787e-05, "loss": 1.0126, "step": 622 }, { "epoch": 0.12774246462989544, "grad_norm": 1.3502846956253052, "learning_rate": 1.9504388958403922e-05, "loss": 1.0366, "step": 623 }, { "epoch": 0.1279475087143736, "grad_norm": 1.5087530612945557, "learning_rate": 1.9502321836790173e-05, "loss": 1.0395, "step": 624 }, { "epoch": 0.12815255279885177, "grad_norm": 1.5362622737884521, "learning_rate": 1.9500250523312425e-05, "loss": 1.0072, "step": 625 }, { "epoch": 0.12835759688332993, "grad_norm": 1.5321781635284424, "learning_rate": 1.9498175018884424e-05, "loss": 0.9867, "step": 626 }, { "epoch": 0.1285626409678081, "grad_norm": 1.4133952856063843, "learning_rate": 1.949609532442176e-05, "loss": 0.9625, "step": 627 }, { "epoch": 0.12876768505228625, "grad_norm": 1.515829086303711, "learning_rate": 1.9494011440841872e-05, "loss": 0.9935, "step": 628 }, { "epoch": 0.1289727291367644, "grad_norm": 1.3934962749481201, "learning_rate": 1.9491923369064044e-05, "loss": 1.0512, "step": 629 }, { "epoch": 0.12917777322124258, "grad_norm": 1.4790562391281128, "learning_rate": 1.9489831110009413e-05, "loss": 0.9424, "step": 630 }, { "epoch": 0.12938281730572074, "grad_norm": 1.4916555881500244, "learning_rate": 1.9487734664600956e-05, "loss": 1.0375, "step": 631 }, { "epoch": 0.1295878613901989, "grad_norm": 1.3426036834716797, "learning_rate": 1.9485634033763507e-05, "loss": 1.0706, "step": 632 }, { "epoch": 0.12979290547467706, "grad_norm": 1.3685039281845093, "learning_rate": 1.948352921842374e-05, "loss": 0.9937, "step": 633 }, { "epoch": 0.12999794955915522, "grad_norm": 1.3138877153396606, "learning_rate": 1.948142021951017e-05, "loss": 1.0518, "step": 634 }, { "epoch": 0.13020299364363339, "grad_norm": 1.4120690822601318, "learning_rate": 1.9479307037953162e-05, "loss": 1.0615, "step": 635 }, { "epoch": 0.13040803772811155, "grad_norm": 1.5628479719161987, "learning_rate": 1.9477189674684938e-05, "loss": 0.9456, "step": 636 }, { "epoch": 0.1306130818125897, "grad_norm": 1.4051995277404785, "learning_rate": 1.9475068130639543e-05, "loss": 1.0256, "step": 637 }, { "epoch": 0.13081812589706787, "grad_norm": 1.312741756439209, "learning_rate": 1.947294240675288e-05, "loss": 0.9441, "step": 638 }, { "epoch": 0.13102316998154603, "grad_norm": 1.4234555959701538, "learning_rate": 1.94708125039627e-05, "loss": 1.025, "step": 639 }, { "epoch": 0.1312282140660242, "grad_norm": 1.3700093030929565, "learning_rate": 1.946867842320858e-05, "loss": 0.963, "step": 640 }, { "epoch": 0.13143325815050236, "grad_norm": 1.3963284492492676, "learning_rate": 1.9466540165431952e-05, "loss": 0.9924, "step": 641 }, { "epoch": 0.13163830223498052, "grad_norm": 1.5487116575241089, "learning_rate": 1.9464397731576093e-05, "loss": 0.9687, "step": 642 }, { "epoch": 0.13184334631945868, "grad_norm": 1.454546570777893, "learning_rate": 1.9462251122586123e-05, "loss": 1.0378, "step": 643 }, { "epoch": 0.13204839040393684, "grad_norm": 1.320212483406067, "learning_rate": 1.946010033940899e-05, "loss": 0.9921, "step": 644 }, { "epoch": 0.132253434488415, "grad_norm": 1.549343466758728, "learning_rate": 1.9457945382993498e-05, "loss": 1.1, "step": 645 }, { "epoch": 0.13245847857289317, "grad_norm": 1.4803204536437988, "learning_rate": 1.9455786254290285e-05, "loss": 0.9986, "step": 646 }, { "epoch": 0.13266352265737133, "grad_norm": 1.3925697803497314, "learning_rate": 1.945362295425183e-05, "loss": 1.0618, "step": 647 }, { "epoch": 0.1328685667418495, "grad_norm": 1.5591115951538086, "learning_rate": 1.9451455483832455e-05, "loss": 0.9617, "step": 648 }, { "epoch": 0.13307361082632765, "grad_norm": 1.5009831190109253, "learning_rate": 1.944928384398832e-05, "loss": 0.9854, "step": 649 }, { "epoch": 0.13327865491080582, "grad_norm": 1.3110395669937134, "learning_rate": 1.9447108035677428e-05, "loss": 1.0534, "step": 650 }, { "epoch": 0.13348369899528398, "grad_norm": 1.4140738248825073, "learning_rate": 1.9444928059859612e-05, "loss": 1.0702, "step": 651 }, { "epoch": 0.13368874307976214, "grad_norm": 1.3273261785507202, "learning_rate": 1.944274391749655e-05, "loss": 0.9893, "step": 652 }, { "epoch": 0.1338937871642403, "grad_norm": 1.3390535116195679, "learning_rate": 1.9440555609551758e-05, "loss": 0.9435, "step": 653 }, { "epoch": 0.13409883124871846, "grad_norm": 1.4752616882324219, "learning_rate": 1.9438363136990587e-05, "loss": 1.0807, "step": 654 }, { "epoch": 0.13430387533319663, "grad_norm": 1.4914573431015015, "learning_rate": 1.9436166500780226e-05, "loss": 1.0751, "step": 655 }, { "epoch": 0.1345089194176748, "grad_norm": 1.7217131853103638, "learning_rate": 1.9433965701889706e-05, "loss": 1.061, "step": 656 }, { "epoch": 0.13471396350215295, "grad_norm": 1.57137930393219, "learning_rate": 1.9431760741289886e-05, "loss": 1.0123, "step": 657 }, { "epoch": 0.1349190075866311, "grad_norm": 1.3895421028137207, "learning_rate": 1.9429551619953464e-05, "loss": 0.9596, "step": 658 }, { "epoch": 0.1351240516711093, "grad_norm": 1.358385682106018, "learning_rate": 1.9427338338854975e-05, "loss": 1.0161, "step": 659 }, { "epoch": 0.13532909575558746, "grad_norm": 1.3503797054290771, "learning_rate": 1.9425120898970788e-05, "loss": 1.0587, "step": 660 }, { "epoch": 0.13553413984006563, "grad_norm": 1.3671783208847046, "learning_rate": 1.9422899301279107e-05, "loss": 1.0991, "step": 661 }, { "epoch": 0.1357391839245438, "grad_norm": 1.361470103263855, "learning_rate": 1.942067354675997e-05, "loss": 0.9365, "step": 662 }, { "epoch": 0.13594422800902195, "grad_norm": 1.411510944366455, "learning_rate": 1.941844363639525e-05, "loss": 1.0408, "step": 663 }, { "epoch": 0.1361492720935001, "grad_norm": 1.540542721748352, "learning_rate": 1.9416209571168648e-05, "loss": 0.9879, "step": 664 }, { "epoch": 0.13635431617797827, "grad_norm": 1.4908039569854736, "learning_rate": 1.9413971352065702e-05, "loss": 0.9449, "step": 665 }, { "epoch": 0.13655936026245644, "grad_norm": 1.439727783203125, "learning_rate": 1.941172898007379e-05, "loss": 1.0625, "step": 666 }, { "epoch": 0.1367644043469346, "grad_norm": 1.3872582912445068, "learning_rate": 1.9409482456182105e-05, "loss": 0.9253, "step": 667 }, { "epoch": 0.13696944843141276, "grad_norm": 1.4980907440185547, "learning_rate": 1.9407231781381684e-05, "loss": 1.0094, "step": 668 }, { "epoch": 0.13717449251589092, "grad_norm": 1.5742045640945435, "learning_rate": 1.9404976956665396e-05, "loss": 0.9988, "step": 669 }, { "epoch": 0.13737953660036908, "grad_norm": 1.4613648653030396, "learning_rate": 1.940271798302793e-05, "loss": 1.0171, "step": 670 }, { "epoch": 0.13758458068484725, "grad_norm": 1.495627760887146, "learning_rate": 1.940045486146582e-05, "loss": 1.0152, "step": 671 }, { "epoch": 0.1377896247693254, "grad_norm": 1.4482388496398926, "learning_rate": 1.939818759297741e-05, "loss": 0.954, "step": 672 }, { "epoch": 0.13799466885380357, "grad_norm": 1.4430444240570068, "learning_rate": 1.9395916178562896e-05, "loss": 0.9884, "step": 673 }, { "epoch": 0.13819971293828173, "grad_norm": 1.5085092782974243, "learning_rate": 1.9393640619224286e-05, "loss": 0.9963, "step": 674 }, { "epoch": 0.1384047570227599, "grad_norm": 1.448380470275879, "learning_rate": 1.9391360915965426e-05, "loss": 0.99, "step": 675 }, { "epoch": 0.13860980110723806, "grad_norm": 1.455114483833313, "learning_rate": 1.9389077069791985e-05, "loss": 1.0654, "step": 676 }, { "epoch": 0.13881484519171622, "grad_norm": 1.4496926069259644, "learning_rate": 1.9386789081711465e-05, "loss": 0.9529, "step": 677 }, { "epoch": 0.13901988927619438, "grad_norm": 1.5361669063568115, "learning_rate": 1.9384496952733185e-05, "loss": 1.0384, "step": 678 }, { "epoch": 0.13922493336067254, "grad_norm": 1.5646703243255615, "learning_rate": 1.9382200683868298e-05, "loss": 1.09, "step": 679 }, { "epoch": 0.1394299774451507, "grad_norm": 1.4651259183883667, "learning_rate": 1.9379900276129787e-05, "loss": 1.0648, "step": 680 }, { "epoch": 0.13963502152962887, "grad_norm": 1.448067545890808, "learning_rate": 1.9377595730532454e-05, "loss": 1.0514, "step": 681 }, { "epoch": 0.13984006561410703, "grad_norm": 1.4279927015304565, "learning_rate": 1.9375287048092927e-05, "loss": 0.9953, "step": 682 }, { "epoch": 0.1400451096985852, "grad_norm": 1.5247936248779297, "learning_rate": 1.9372974229829666e-05, "loss": 1.0179, "step": 683 }, { "epoch": 0.14025015378306335, "grad_norm": 1.3555008172988892, "learning_rate": 1.937065727676294e-05, "loss": 1.0179, "step": 684 }, { "epoch": 0.14045519786754151, "grad_norm": 1.4209119081497192, "learning_rate": 1.9368336189914864e-05, "loss": 0.9161, "step": 685 }, { "epoch": 0.14066024195201968, "grad_norm": 1.5028246641159058, "learning_rate": 1.9366010970309355e-05, "loss": 1.0492, "step": 686 }, { "epoch": 0.14086528603649784, "grad_norm": 1.3365483283996582, "learning_rate": 1.9363681618972166e-05, "loss": 0.9207, "step": 687 }, { "epoch": 0.141070330120976, "grad_norm": 1.3784081935882568, "learning_rate": 1.936134813693087e-05, "loss": 0.9963, "step": 688 }, { "epoch": 0.14127537420545416, "grad_norm": 1.4087315797805786, "learning_rate": 1.9359010525214864e-05, "loss": 1.0509, "step": 689 }, { "epoch": 0.14148041828993232, "grad_norm": 1.3553178310394287, "learning_rate": 1.935666878485536e-05, "loss": 0.968, "step": 690 }, { "epoch": 0.1416854623744105, "grad_norm": 1.3886158466339111, "learning_rate": 1.93543229168854e-05, "loss": 0.9044, "step": 691 }, { "epoch": 0.14189050645888865, "grad_norm": 1.4283106327056885, "learning_rate": 1.9351972922339835e-05, "loss": 1.0553, "step": 692 }, { "epoch": 0.1420955505433668, "grad_norm": 1.427559494972229, "learning_rate": 1.934961880225535e-05, "loss": 1.007, "step": 693 }, { "epoch": 0.14230059462784497, "grad_norm": 1.3681126832962036, "learning_rate": 1.9347260557670447e-05, "loss": 1.0145, "step": 694 }, { "epoch": 0.14250563871232316, "grad_norm": 1.4436956644058228, "learning_rate": 1.9344898189625438e-05, "loss": 1.0711, "step": 695 }, { "epoch": 0.14271068279680132, "grad_norm": 1.4365707635879517, "learning_rate": 1.9342531699162467e-05, "loss": 1.0505, "step": 696 }, { "epoch": 0.1429157268812795, "grad_norm": 1.4215761423110962, "learning_rate": 1.9340161087325483e-05, "loss": 1.0625, "step": 697 }, { "epoch": 0.14312077096575765, "grad_norm": 1.4474525451660156, "learning_rate": 1.9337786355160262e-05, "loss": 1.0152, "step": 698 }, { "epoch": 0.1433258150502358, "grad_norm": 1.3449316024780273, "learning_rate": 1.9335407503714397e-05, "loss": 1.0152, "step": 699 }, { "epoch": 0.14353085913471397, "grad_norm": 1.4188958406448364, "learning_rate": 1.93330245340373e-05, "loss": 0.9028, "step": 700 }, { "epoch": 0.14373590321919213, "grad_norm": 1.4724253416061401, "learning_rate": 1.933063744718019e-05, "loss": 1.0075, "step": 701 }, { "epoch": 0.1439409473036703, "grad_norm": 1.4998283386230469, "learning_rate": 1.9328246244196117e-05, "loss": 0.9785, "step": 702 }, { "epoch": 0.14414599138814846, "grad_norm": 1.5223363637924194, "learning_rate": 1.9325850926139933e-05, "loss": 0.9054, "step": 703 }, { "epoch": 0.14435103547262662, "grad_norm": 1.413316249847412, "learning_rate": 1.9323451494068313e-05, "loss": 1.0106, "step": 704 }, { "epoch": 0.14455607955710478, "grad_norm": 1.5525649785995483, "learning_rate": 1.932104794903974e-05, "loss": 1.0588, "step": 705 }, { "epoch": 0.14476112364158294, "grad_norm": 1.4478849172592163, "learning_rate": 1.9318640292114526e-05, "loss": 1.0312, "step": 706 }, { "epoch": 0.1449661677260611, "grad_norm": 1.2813167572021484, "learning_rate": 1.931622852435478e-05, "loss": 0.994, "step": 707 }, { "epoch": 0.14517121181053927, "grad_norm": 1.3285024166107178, "learning_rate": 1.9313812646824432e-05, "loss": 1.0273, "step": 708 }, { "epoch": 0.14537625589501743, "grad_norm": 1.318396806716919, "learning_rate": 1.9311392660589226e-05, "loss": 0.9916, "step": 709 }, { "epoch": 0.1455812999794956, "grad_norm": 1.3827990293502808, "learning_rate": 1.930896856671672e-05, "loss": 1.0117, "step": 710 }, { "epoch": 0.14578634406397376, "grad_norm": 1.4484456777572632, "learning_rate": 1.930654036627628e-05, "loss": 0.9562, "step": 711 }, { "epoch": 0.14599138814845192, "grad_norm": 1.3730283975601196, "learning_rate": 1.930410806033908e-05, "loss": 0.9588, "step": 712 }, { "epoch": 0.14619643223293008, "grad_norm": 1.3945765495300293, "learning_rate": 1.9301671649978114e-05, "loss": 0.9999, "step": 713 }, { "epoch": 0.14640147631740824, "grad_norm": 1.3280614614486694, "learning_rate": 1.9299231136268185e-05, "loss": 0.974, "step": 714 }, { "epoch": 0.1466065204018864, "grad_norm": 1.4261183738708496, "learning_rate": 1.9296786520285898e-05, "loss": 0.9903, "step": 715 }, { "epoch": 0.14681156448636457, "grad_norm": 1.5075476169586182, "learning_rate": 1.9294337803109675e-05, "loss": 1.0111, "step": 716 }, { "epoch": 0.14701660857084273, "grad_norm": 1.3194365501403809, "learning_rate": 1.929188498581975e-05, "loss": 0.9286, "step": 717 }, { "epoch": 0.1472216526553209, "grad_norm": 1.4215418100357056, "learning_rate": 1.9289428069498157e-05, "loss": 1.0002, "step": 718 }, { "epoch": 0.14742669673979905, "grad_norm": 1.5159329175949097, "learning_rate": 1.9286967055228744e-05, "loss": 1.1025, "step": 719 }, { "epoch": 0.1476317408242772, "grad_norm": 1.5690945386886597, "learning_rate": 1.9284501944097164e-05, "loss": 1.007, "step": 720 }, { "epoch": 0.14783678490875538, "grad_norm": 1.3981622457504272, "learning_rate": 1.928203273719088e-05, "loss": 0.9951, "step": 721 }, { "epoch": 0.14804182899323354, "grad_norm": 1.492071509361267, "learning_rate": 1.9279559435599164e-05, "loss": 0.999, "step": 722 }, { "epoch": 0.1482468730777117, "grad_norm": 1.374814748764038, "learning_rate": 1.9277082040413083e-05, "loss": 0.9273, "step": 723 }, { "epoch": 0.14845191716218986, "grad_norm": 1.4017668962478638, "learning_rate": 1.927460055272552e-05, "loss": 0.9985, "step": 724 }, { "epoch": 0.14865696124666802, "grad_norm": 1.518797755241394, "learning_rate": 1.9272114973631165e-05, "loss": 1.0761, "step": 725 }, { "epoch": 0.14886200533114619, "grad_norm": 1.3515702486038208, "learning_rate": 1.926962530422651e-05, "loss": 0.966, "step": 726 }, { "epoch": 0.14906704941562435, "grad_norm": 1.4166096448898315, "learning_rate": 1.926713154560984e-05, "loss": 0.9779, "step": 727 }, { "epoch": 0.1492720935001025, "grad_norm": 1.3748536109924316, "learning_rate": 1.9264633698881266e-05, "loss": 1.01, "step": 728 }, { "epoch": 0.14947713758458067, "grad_norm": 1.3783295154571533, "learning_rate": 1.9262131765142684e-05, "loss": 1.0828, "step": 729 }, { "epoch": 0.14968218166905886, "grad_norm": 1.5896005630493164, "learning_rate": 1.9259625745497803e-05, "loss": 1.0625, "step": 730 }, { "epoch": 0.14988722575353702, "grad_norm": 1.5413119792938232, "learning_rate": 1.925711564105213e-05, "loss": 1.016, "step": 731 }, { "epoch": 0.15009226983801519, "grad_norm": 1.2909663915634155, "learning_rate": 1.9254601452912972e-05, "loss": 0.9191, "step": 732 }, { "epoch": 0.15029731392249335, "grad_norm": 1.429635763168335, "learning_rate": 1.9252083182189447e-05, "loss": 1.0374, "step": 733 }, { "epoch": 0.1505023580069715, "grad_norm": 1.5145387649536133, "learning_rate": 1.9249560829992457e-05, "loss": 0.9949, "step": 734 }, { "epoch": 0.15070740209144967, "grad_norm": 1.4340344667434692, "learning_rate": 1.9247034397434727e-05, "loss": 0.9872, "step": 735 }, { "epoch": 0.15091244617592783, "grad_norm": 1.3264100551605225, "learning_rate": 1.924450388563076e-05, "loss": 1.0483, "step": 736 }, { "epoch": 0.151117490260406, "grad_norm": 1.4115225076675415, "learning_rate": 1.924196929569688e-05, "loss": 0.9912, "step": 737 }, { "epoch": 0.15132253434488416, "grad_norm": 1.3337599039077759, "learning_rate": 1.9239430628751187e-05, "loss": 1.0036, "step": 738 }, { "epoch": 0.15152757842936232, "grad_norm": 1.3442953824996948, "learning_rate": 1.92368878859136e-05, "loss": 1.0674, "step": 739 }, { "epoch": 0.15173262251384048, "grad_norm": 1.4255139827728271, "learning_rate": 1.9234341068305816e-05, "loss": 1.0603, "step": 740 }, { "epoch": 0.15193766659831864, "grad_norm": 1.2988852262496948, "learning_rate": 1.9231790177051354e-05, "loss": 0.9389, "step": 741 }, { "epoch": 0.1521427106827968, "grad_norm": 1.3921407461166382, "learning_rate": 1.922923521327551e-05, "loss": 0.9352, "step": 742 }, { "epoch": 0.15234775476727497, "grad_norm": 1.478826642036438, "learning_rate": 1.9226676178105382e-05, "loss": 1.0289, "step": 743 }, { "epoch": 0.15255279885175313, "grad_norm": 1.3897300958633423, "learning_rate": 1.922411307266987e-05, "loss": 1.0294, "step": 744 }, { "epoch": 0.1527578429362313, "grad_norm": 1.5523234605789185, "learning_rate": 1.9221545898099658e-05, "loss": 1.0632, "step": 745 }, { "epoch": 0.15296288702070945, "grad_norm": 1.402062177658081, "learning_rate": 1.921897465552724e-05, "loss": 1.0296, "step": 746 }, { "epoch": 0.15316793110518762, "grad_norm": 1.3866541385650635, "learning_rate": 1.9216399346086893e-05, "loss": 0.9427, "step": 747 }, { "epoch": 0.15337297518966578, "grad_norm": 1.4517161846160889, "learning_rate": 1.921381997091469e-05, "loss": 0.977, "step": 748 }, { "epoch": 0.15357801927414394, "grad_norm": 1.257747769355774, "learning_rate": 1.92112365311485e-05, "loss": 0.9226, "step": 749 }, { "epoch": 0.1537830633586221, "grad_norm": 1.3763848543167114, "learning_rate": 1.920864902792799e-05, "loss": 0.9879, "step": 750 }, { "epoch": 0.15398810744310026, "grad_norm": 1.2177491188049316, "learning_rate": 1.9206057462394606e-05, "loss": 0.9071, "step": 751 }, { "epoch": 0.15419315152757843, "grad_norm": 1.51632559299469, "learning_rate": 1.9203461835691596e-05, "loss": 1.0901, "step": 752 }, { "epoch": 0.1543981956120566, "grad_norm": 1.5547055006027222, "learning_rate": 1.9200862148964e-05, "loss": 0.996, "step": 753 }, { "epoch": 0.15460323969653475, "grad_norm": 1.4958828687667847, "learning_rate": 1.9198258403358642e-05, "loss": 0.9883, "step": 754 }, { "epoch": 0.1548082837810129, "grad_norm": 1.4549440145492554, "learning_rate": 1.9195650600024147e-05, "loss": 0.9702, "step": 755 }, { "epoch": 0.15501332786549107, "grad_norm": 1.4733927249908447, "learning_rate": 1.919303874011092e-05, "loss": 1.0301, "step": 756 }, { "epoch": 0.15521837194996924, "grad_norm": 1.4134684801101685, "learning_rate": 1.9190422824771158e-05, "loss": 1.0425, "step": 757 }, { "epoch": 0.1554234160344474, "grad_norm": 1.4485481977462769, "learning_rate": 1.918780285515885e-05, "loss": 0.9702, "step": 758 }, { "epoch": 0.15562846011892556, "grad_norm": 1.4636824131011963, "learning_rate": 1.9185178832429778e-05, "loss": 0.915, "step": 759 }, { "epoch": 0.15583350420340372, "grad_norm": 1.458162546157837, "learning_rate": 1.9182550757741497e-05, "loss": 0.8787, "step": 760 }, { "epoch": 0.15603854828788188, "grad_norm": 1.4550502300262451, "learning_rate": 1.9179918632253366e-05, "loss": 0.9024, "step": 761 }, { "epoch": 0.15624359237236005, "grad_norm": 1.3929989337921143, "learning_rate": 1.9177282457126515e-05, "loss": 1.0369, "step": 762 }, { "epoch": 0.1564486364568382, "grad_norm": 1.4786940813064575, "learning_rate": 1.9174642233523876e-05, "loss": 1.0708, "step": 763 }, { "epoch": 0.15665368054131637, "grad_norm": 1.339672327041626, "learning_rate": 1.917199796261016e-05, "loss": 1.0325, "step": 764 }, { "epoch": 0.15685872462579453, "grad_norm": 1.5450475215911865, "learning_rate": 1.916934964555186e-05, "loss": 0.9085, "step": 765 }, { "epoch": 0.15706376871027272, "grad_norm": 1.4401686191558838, "learning_rate": 1.9166697283517258e-05, "loss": 1.0309, "step": 766 }, { "epoch": 0.15726881279475088, "grad_norm": 1.485561728477478, "learning_rate": 1.9164040877676425e-05, "loss": 1.0584, "step": 767 }, { "epoch": 0.15747385687922905, "grad_norm": 1.3362407684326172, "learning_rate": 1.91613804292012e-05, "loss": 1.0583, "step": 768 }, { "epoch": 0.1576789009637072, "grad_norm": 1.3672751188278198, "learning_rate": 1.9158715939265228e-05, "loss": 0.9644, "step": 769 }, { "epoch": 0.15788394504818537, "grad_norm": 1.4653294086456299, "learning_rate": 1.9156047409043916e-05, "loss": 1.0184, "step": 770 }, { "epoch": 0.15808898913266353, "grad_norm": 1.3496290445327759, "learning_rate": 1.915337483971446e-05, "loss": 1.0577, "step": 771 }, { "epoch": 0.1582940332171417, "grad_norm": 1.3911995887756348, "learning_rate": 1.9150698232455853e-05, "loss": 0.9741, "step": 772 }, { "epoch": 0.15849907730161986, "grad_norm": 1.4014722108840942, "learning_rate": 1.9148017588448845e-05, "loss": 1.0316, "step": 773 }, { "epoch": 0.15870412138609802, "grad_norm": 1.3247557878494263, "learning_rate": 1.9145332908875984e-05, "loss": 0.9057, "step": 774 }, { "epoch": 0.15890916547057618, "grad_norm": 1.3914694786071777, "learning_rate": 1.9142644194921586e-05, "loss": 1.0694, "step": 775 }, { "epoch": 0.15911420955505434, "grad_norm": 1.4755656719207764, "learning_rate": 1.9139951447771756e-05, "loss": 0.9697, "step": 776 }, { "epoch": 0.1593192536395325, "grad_norm": 1.5497441291809082, "learning_rate": 1.913725466861438e-05, "loss": 0.9462, "step": 777 }, { "epoch": 0.15952429772401067, "grad_norm": 1.4007714986801147, "learning_rate": 1.9134553858639107e-05, "loss": 0.9639, "step": 778 }, { "epoch": 0.15972934180848883, "grad_norm": 1.3628324270248413, "learning_rate": 1.9131849019037387e-05, "loss": 0.9887, "step": 779 }, { "epoch": 0.159934385892967, "grad_norm": 1.4248710870742798, "learning_rate": 1.912914015100243e-05, "loss": 1.016, "step": 780 }, { "epoch": 0.16013942997744515, "grad_norm": 1.4082388877868652, "learning_rate": 1.912642725572923e-05, "loss": 0.9891, "step": 781 }, { "epoch": 0.16034447406192331, "grad_norm": 1.3936858177185059, "learning_rate": 1.9123710334414552e-05, "loss": 1.0531, "step": 782 }, { "epoch": 0.16054951814640148, "grad_norm": 1.3683582544326782, "learning_rate": 1.912098938825695e-05, "loss": 1.0525, "step": 783 }, { "epoch": 0.16075456223087964, "grad_norm": 1.330979347229004, "learning_rate": 1.911826441845674e-05, "loss": 0.9988, "step": 784 }, { "epoch": 0.1609596063153578, "grad_norm": 1.285231590270996, "learning_rate": 1.9115535426216018e-05, "loss": 0.9319, "step": 785 }, { "epoch": 0.16116465039983596, "grad_norm": 1.2878271341323853, "learning_rate": 1.911280241273865e-05, "loss": 0.9545, "step": 786 }, { "epoch": 0.16136969448431412, "grad_norm": 1.3590327501296997, "learning_rate": 1.911006537923029e-05, "loss": 0.9829, "step": 787 }, { "epoch": 0.1615747385687923, "grad_norm": 1.4957231283187866, "learning_rate": 1.9107324326898347e-05, "loss": 0.9131, "step": 788 }, { "epoch": 0.16177978265327045, "grad_norm": 1.3930892944335938, "learning_rate": 1.9104579256952018e-05, "loss": 1.0766, "step": 789 }, { "epoch": 0.1619848267377486, "grad_norm": 1.3657057285308838, "learning_rate": 1.9101830170602264e-05, "loss": 1.0176, "step": 790 }, { "epoch": 0.16218987082222677, "grad_norm": 1.5298970937728882, "learning_rate": 1.9099077069061817e-05, "loss": 1.0464, "step": 791 }, { "epoch": 0.16239491490670493, "grad_norm": 1.4518613815307617, "learning_rate": 1.9096319953545186e-05, "loss": 1.0124, "step": 792 }, { "epoch": 0.1625999589911831, "grad_norm": 1.5004230737686157, "learning_rate": 1.9093558825268647e-05, "loss": 0.9512, "step": 793 }, { "epoch": 0.16280500307566126, "grad_norm": 1.2947436571121216, "learning_rate": 1.9090793685450246e-05, "loss": 1.0135, "step": 794 }, { "epoch": 0.16301004716013942, "grad_norm": 1.37473726272583, "learning_rate": 1.9088024535309803e-05, "loss": 0.984, "step": 795 }, { "epoch": 0.16321509124461758, "grad_norm": 1.3565582036972046, "learning_rate": 1.9085251376068898e-05, "loss": 0.9992, "step": 796 }, { "epoch": 0.16342013532909574, "grad_norm": 1.415236234664917, "learning_rate": 1.908247420895089e-05, "loss": 1.0594, "step": 797 }, { "epoch": 0.1636251794135739, "grad_norm": 1.3737848997116089, "learning_rate": 1.90796930351809e-05, "loss": 0.9733, "step": 798 }, { "epoch": 0.16383022349805207, "grad_norm": 1.3810789585113525, "learning_rate": 1.907690785598582e-05, "loss": 1.0191, "step": 799 }, { "epoch": 0.16403526758253023, "grad_norm": 1.321390151977539, "learning_rate": 1.9074118672594298e-05, "loss": 0.922, "step": 800 }, { "epoch": 0.16424031166700842, "grad_norm": 1.5140111446380615, "learning_rate": 1.9071325486236773e-05, "loss": 1.0411, "step": 801 }, { "epoch": 0.16444535575148658, "grad_norm": 1.3552943468093872, "learning_rate": 1.9068528298145418e-05, "loss": 1.0086, "step": 802 }, { "epoch": 0.16465039983596474, "grad_norm": 1.4072961807250977, "learning_rate": 1.9065727109554198e-05, "loss": 0.9831, "step": 803 }, { "epoch": 0.1648554439204429, "grad_norm": 1.472770094871521, "learning_rate": 1.9062921921698824e-05, "loss": 0.9447, "step": 804 }, { "epoch": 0.16506048800492107, "grad_norm": 1.3597794771194458, "learning_rate": 1.9060112735816786e-05, "loss": 1.0351, "step": 805 }, { "epoch": 0.16526553208939923, "grad_norm": 1.477723479270935, "learning_rate": 1.9057299553147333e-05, "loss": 0.9759, "step": 806 }, { "epoch": 0.1654705761738774, "grad_norm": 1.3997788429260254, "learning_rate": 1.905448237493147e-05, "loss": 1.0387, "step": 807 }, { "epoch": 0.16567562025835555, "grad_norm": 1.4626474380493164, "learning_rate": 1.9051661202411966e-05, "loss": 1.0069, "step": 808 }, { "epoch": 0.16588066434283372, "grad_norm": 1.5125443935394287, "learning_rate": 1.9048836036833368e-05, "loss": 1.0422, "step": 809 }, { "epoch": 0.16608570842731188, "grad_norm": 1.2780448198318481, "learning_rate": 1.904600687944196e-05, "loss": 0.9308, "step": 810 }, { "epoch": 0.16629075251179004, "grad_norm": 1.4409434795379639, "learning_rate": 1.904317373148581e-05, "loss": 1.0438, "step": 811 }, { "epoch": 0.1664957965962682, "grad_norm": 1.3818930387496948, "learning_rate": 1.9040336594214727e-05, "loss": 1.0009, "step": 812 }, { "epoch": 0.16670084068074636, "grad_norm": 1.4716781377792358, "learning_rate": 1.903749546888029e-05, "loss": 1.0127, "step": 813 }, { "epoch": 0.16690588476522453, "grad_norm": 1.4330209493637085, "learning_rate": 1.9034650356735843e-05, "loss": 1.0072, "step": 814 }, { "epoch": 0.1671109288497027, "grad_norm": 1.394307255744934, "learning_rate": 1.903180125903648e-05, "loss": 0.9944, "step": 815 }, { "epoch": 0.16731597293418085, "grad_norm": 1.4421154260635376, "learning_rate": 1.9028948177039045e-05, "loss": 1.0341, "step": 816 }, { "epoch": 0.167521017018659, "grad_norm": 1.3404803276062012, "learning_rate": 1.9026091112002163e-05, "loss": 1.0369, "step": 817 }, { "epoch": 0.16772606110313718, "grad_norm": 1.3005973100662231, "learning_rate": 1.9023230065186192e-05, "loss": 1.0044, "step": 818 }, { "epoch": 0.16793110518761534, "grad_norm": 1.3868035078048706, "learning_rate": 1.902036503785326e-05, "loss": 1.0473, "step": 819 }, { "epoch": 0.1681361492720935, "grad_norm": 1.5277482271194458, "learning_rate": 1.9017496031267253e-05, "loss": 1.0302, "step": 820 }, { "epoch": 0.16834119335657166, "grad_norm": 1.323912262916565, "learning_rate": 1.90146230466938e-05, "loss": 1.0425, "step": 821 }, { "epoch": 0.16854623744104982, "grad_norm": 1.4408150911331177, "learning_rate": 1.90117460854003e-05, "loss": 1.0297, "step": 822 }, { "epoch": 0.16875128152552799, "grad_norm": 1.3105264902114868, "learning_rate": 1.9008865148655893e-05, "loss": 0.9404, "step": 823 }, { "epoch": 0.16895632561000615, "grad_norm": 1.3322471380233765, "learning_rate": 1.900598023773148e-05, "loss": 1.0257, "step": 824 }, { "epoch": 0.1691613696944843, "grad_norm": 1.3714042901992798, "learning_rate": 1.9003091353899713e-05, "loss": 1.0439, "step": 825 }, { "epoch": 0.16936641377896247, "grad_norm": 1.4388227462768555, "learning_rate": 1.9000198498435002e-05, "loss": 0.9873, "step": 826 }, { "epoch": 0.16957145786344063, "grad_norm": 1.4087018966674805, "learning_rate": 1.8997301672613496e-05, "loss": 0.9313, "step": 827 }, { "epoch": 0.1697765019479188, "grad_norm": 1.4028054475784302, "learning_rate": 1.899440087771311e-05, "loss": 1.006, "step": 828 }, { "epoch": 0.16998154603239696, "grad_norm": 1.5050058364868164, "learning_rate": 1.89914961150135e-05, "loss": 0.9803, "step": 829 }, { "epoch": 0.17018659011687512, "grad_norm": 1.2328546047210693, "learning_rate": 1.8988587385796078e-05, "loss": 1.0397, "step": 830 }, { "epoch": 0.17039163420135328, "grad_norm": 1.3684916496276855, "learning_rate": 1.8985674691344e-05, "loss": 1.0033, "step": 831 }, { "epoch": 0.17059667828583144, "grad_norm": 1.5097377300262451, "learning_rate": 1.8982758032942184e-05, "loss": 1.0291, "step": 832 }, { "epoch": 0.1708017223703096, "grad_norm": 1.3797063827514648, "learning_rate": 1.897983741187728e-05, "loss": 0.9592, "step": 833 }, { "epoch": 0.17100676645478777, "grad_norm": 1.3697750568389893, "learning_rate": 1.89769128294377e-05, "loss": 0.9561, "step": 834 }, { "epoch": 0.17121181053926593, "grad_norm": 1.392430305480957, "learning_rate": 1.8973984286913584e-05, "loss": 0.9792, "step": 835 }, { "epoch": 0.1714168546237441, "grad_norm": 1.3672035932540894, "learning_rate": 1.897105178559685e-05, "loss": 1.0139, "step": 836 }, { "epoch": 0.17162189870822228, "grad_norm": 1.4986127614974976, "learning_rate": 1.896811532678113e-05, "loss": 0.9681, "step": 837 }, { "epoch": 0.17182694279270044, "grad_norm": 1.3286699056625366, "learning_rate": 1.8965174911761824e-05, "loss": 0.9832, "step": 838 }, { "epoch": 0.1720319868771786, "grad_norm": 1.3616853952407837, "learning_rate": 1.8962230541836068e-05, "loss": 1.04, "step": 839 }, { "epoch": 0.17223703096165677, "grad_norm": 1.3476420640945435, "learning_rate": 1.8959282218302746e-05, "loss": 1.0746, "step": 840 }, { "epoch": 0.17244207504613493, "grad_norm": 1.3947440385818481, "learning_rate": 1.895632994246248e-05, "loss": 1.0357, "step": 841 }, { "epoch": 0.1726471191306131, "grad_norm": 1.377203106880188, "learning_rate": 1.8953373715617646e-05, "loss": 1.0081, "step": 842 }, { "epoch": 0.17285216321509125, "grad_norm": 1.4085217714309692, "learning_rate": 1.8950413539072354e-05, "loss": 0.9687, "step": 843 }, { "epoch": 0.17305720729956942, "grad_norm": 1.4017854928970337, "learning_rate": 1.8947449414132458e-05, "loss": 1.0503, "step": 844 }, { "epoch": 0.17326225138404758, "grad_norm": 1.4089491367340088, "learning_rate": 1.8944481342105555e-05, "loss": 0.9326, "step": 845 }, { "epoch": 0.17346729546852574, "grad_norm": 1.4087719917297363, "learning_rate": 1.8941509324300983e-05, "loss": 1.0249, "step": 846 }, { "epoch": 0.1736723395530039, "grad_norm": 1.4049080610275269, "learning_rate": 1.893853336202983e-05, "loss": 1.0316, "step": 847 }, { "epoch": 0.17387738363748206, "grad_norm": 1.4525107145309448, "learning_rate": 1.89355534566049e-05, "loss": 1.0381, "step": 848 }, { "epoch": 0.17408242772196023, "grad_norm": 1.3661900758743286, "learning_rate": 1.8932569609340765e-05, "loss": 0.952, "step": 849 }, { "epoch": 0.1742874718064384, "grad_norm": 1.473065733909607, "learning_rate": 1.8929581821553712e-05, "loss": 0.9927, "step": 850 }, { "epoch": 0.17449251589091655, "grad_norm": 1.3018890619277954, "learning_rate": 1.8926590094561784e-05, "loss": 1.0389, "step": 851 }, { "epoch": 0.1746975599753947, "grad_norm": 1.4222490787506104, "learning_rate": 1.892359442968475e-05, "loss": 1.0366, "step": 852 }, { "epoch": 0.17490260405987287, "grad_norm": 1.3452872037887573, "learning_rate": 1.8920594828244123e-05, "loss": 0.9124, "step": 853 }, { "epoch": 0.17510764814435104, "grad_norm": 1.3145323991775513, "learning_rate": 1.8917591291563152e-05, "loss": 0.9825, "step": 854 }, { "epoch": 0.1753126922288292, "grad_norm": 1.3745651245117188, "learning_rate": 1.8914583820966816e-05, "loss": 0.9979, "step": 855 }, { "epoch": 0.17551773631330736, "grad_norm": 1.4021227359771729, "learning_rate": 1.8911572417781837e-05, "loss": 0.9762, "step": 856 }, { "epoch": 0.17572278039778552, "grad_norm": 1.2935750484466553, "learning_rate": 1.8908557083336668e-05, "loss": 1.034, "step": 857 }, { "epoch": 0.17592782448226368, "grad_norm": 1.3987634181976318, "learning_rate": 1.8905537818961494e-05, "loss": 0.9444, "step": 858 }, { "epoch": 0.17613286856674185, "grad_norm": 1.3980003595352173, "learning_rate": 1.890251462598824e-05, "loss": 0.9535, "step": 859 }, { "epoch": 0.17633791265122, "grad_norm": 1.3230862617492676, "learning_rate": 1.8899487505750554e-05, "loss": 0.9004, "step": 860 }, { "epoch": 0.17654295673569817, "grad_norm": 1.420479655265808, "learning_rate": 1.8896456459583835e-05, "loss": 1.0221, "step": 861 }, { "epoch": 0.17674800082017633, "grad_norm": 1.444658875465393, "learning_rate": 1.889342148882519e-05, "loss": 1.0664, "step": 862 }, { "epoch": 0.1769530449046545, "grad_norm": 1.3211175203323364, "learning_rate": 1.8890382594813473e-05, "loss": 0.9396, "step": 863 }, { "epoch": 0.17715808898913266, "grad_norm": 1.3933604955673218, "learning_rate": 1.888733977888927e-05, "loss": 0.9896, "step": 864 }, { "epoch": 0.17736313307361082, "grad_norm": 1.4485355615615845, "learning_rate": 1.8884293042394882e-05, "loss": 1.0708, "step": 865 }, { "epoch": 0.17756817715808898, "grad_norm": 1.3950096368789673, "learning_rate": 1.8881242386674362e-05, "loss": 0.9238, "step": 866 }, { "epoch": 0.17777322124256714, "grad_norm": 1.323636770248413, "learning_rate": 1.8878187813073465e-05, "loss": 0.9009, "step": 867 }, { "epoch": 0.1779782653270453, "grad_norm": 1.5187108516693115, "learning_rate": 1.8875129322939698e-05, "loss": 0.963, "step": 868 }, { "epoch": 0.17818330941152347, "grad_norm": 1.3617372512817383, "learning_rate": 1.8872066917622283e-05, "loss": 1.0472, "step": 869 }, { "epoch": 0.17838835349600163, "grad_norm": 1.3873493671417236, "learning_rate": 1.886900059847218e-05, "loss": 1.0319, "step": 870 }, { "epoch": 0.1785933975804798, "grad_norm": 1.3494668006896973, "learning_rate": 1.886593036684206e-05, "loss": 1.0131, "step": 871 }, { "epoch": 0.17879844166495795, "grad_norm": 1.3772006034851074, "learning_rate": 1.886285622408633e-05, "loss": 0.9779, "step": 872 }, { "epoch": 0.17900348574943614, "grad_norm": 1.2964247465133667, "learning_rate": 1.8859778171561118e-05, "loss": 0.9192, "step": 873 }, { "epoch": 0.1792085298339143, "grad_norm": 1.4049022197723389, "learning_rate": 1.8856696210624284e-05, "loss": 1.0327, "step": 874 }, { "epoch": 0.17941357391839247, "grad_norm": 1.3237428665161133, "learning_rate": 1.88536103426354e-05, "loss": 1.0223, "step": 875 }, { "epoch": 0.17961861800287063, "grad_norm": 1.4712395668029785, "learning_rate": 1.8850520568955785e-05, "loss": 1.0945, "step": 876 }, { "epoch": 0.1798236620873488, "grad_norm": 1.316125512123108, "learning_rate": 1.8847426890948447e-05, "loss": 1.0008, "step": 877 }, { "epoch": 0.18002870617182695, "grad_norm": 1.3149564266204834, "learning_rate": 1.8844329309978146e-05, "loss": 0.9534, "step": 878 }, { "epoch": 0.18023375025630511, "grad_norm": 1.3449842929840088, "learning_rate": 1.8841227827411343e-05, "loss": 1.0039, "step": 879 }, { "epoch": 0.18043879434078328, "grad_norm": 1.4726043939590454, "learning_rate": 1.883812244461624e-05, "loss": 1.0364, "step": 880 }, { "epoch": 0.18064383842526144, "grad_norm": 1.3881314992904663, "learning_rate": 1.8835013162962736e-05, "loss": 1.0889, "step": 881 }, { "epoch": 0.1808488825097396, "grad_norm": 1.3049477338790894, "learning_rate": 1.8831899983822475e-05, "loss": 0.9521, "step": 882 }, { "epoch": 0.18105392659421776, "grad_norm": 1.3304662704467773, "learning_rate": 1.88287829085688e-05, "loss": 0.9933, "step": 883 }, { "epoch": 0.18125897067869592, "grad_norm": 1.2811650037765503, "learning_rate": 1.8825661938576784e-05, "loss": 0.9968, "step": 884 }, { "epoch": 0.1814640147631741, "grad_norm": 1.4259216785430908, "learning_rate": 1.8822537075223213e-05, "loss": 1.0804, "step": 885 }, { "epoch": 0.18166905884765225, "grad_norm": 1.4515243768692017, "learning_rate": 1.881940831988659e-05, "loss": 0.9813, "step": 886 }, { "epoch": 0.1818741029321304, "grad_norm": 1.404020071029663, "learning_rate": 1.8816275673947148e-05, "loss": 1.0744, "step": 887 }, { "epoch": 0.18207914701660857, "grad_norm": 1.461674690246582, "learning_rate": 1.8813139138786814e-05, "loss": 0.9829, "step": 888 }, { "epoch": 0.18228419110108673, "grad_norm": 1.2721086740493774, "learning_rate": 1.8809998715789247e-05, "loss": 0.9514, "step": 889 }, { "epoch": 0.1824892351855649, "grad_norm": 1.4128843545913696, "learning_rate": 1.880685440633982e-05, "loss": 1.0363, "step": 890 }, { "epoch": 0.18269427927004306, "grad_norm": 1.467276692390442, "learning_rate": 1.880370621182561e-05, "loss": 1.009, "step": 891 }, { "epoch": 0.18289932335452122, "grad_norm": 1.3447619676589966, "learning_rate": 1.8800554133635417e-05, "loss": 0.9641, "step": 892 }, { "epoch": 0.18310436743899938, "grad_norm": 1.5206482410430908, "learning_rate": 1.8797398173159757e-05, "loss": 1.0124, "step": 893 }, { "epoch": 0.18330941152347754, "grad_norm": 1.4381628036499023, "learning_rate": 1.8794238331790847e-05, "loss": 0.9565, "step": 894 }, { "epoch": 0.1835144556079557, "grad_norm": 1.4004393815994263, "learning_rate": 1.8791074610922624e-05, "loss": 0.9679, "step": 895 }, { "epoch": 0.18371949969243387, "grad_norm": 1.3604897260665894, "learning_rate": 1.8787907011950736e-05, "loss": 1.0064, "step": 896 }, { "epoch": 0.18392454377691203, "grad_norm": 1.2776540517807007, "learning_rate": 1.8784735536272543e-05, "loss": 0.997, "step": 897 }, { "epoch": 0.1841295878613902, "grad_norm": 1.3187589645385742, "learning_rate": 1.8781560185287113e-05, "loss": 1.0395, "step": 898 }, { "epoch": 0.18433463194586835, "grad_norm": 1.3419272899627686, "learning_rate": 1.8778380960395217e-05, "loss": 0.9409, "step": 899 }, { "epoch": 0.18453967603034652, "grad_norm": 1.3630974292755127, "learning_rate": 1.877519786299935e-05, "loss": 0.9559, "step": 900 }, { "epoch": 0.18474472011482468, "grad_norm": 1.2162123918533325, "learning_rate": 1.87720108945037e-05, "loss": 0.9756, "step": 901 }, { "epoch": 0.18494976419930284, "grad_norm": 1.3916378021240234, "learning_rate": 1.8768820056314173e-05, "loss": 0.9917, "step": 902 }, { "epoch": 0.185154808283781, "grad_norm": 1.4868042469024658, "learning_rate": 1.876562534983838e-05, "loss": 1.0746, "step": 903 }, { "epoch": 0.18535985236825916, "grad_norm": 1.3044636249542236, "learning_rate": 1.8762426776485636e-05, "loss": 0.9192, "step": 904 }, { "epoch": 0.18556489645273733, "grad_norm": 1.432564377784729, "learning_rate": 1.875922433766696e-05, "loss": 0.9398, "step": 905 }, { "epoch": 0.1857699405372155, "grad_norm": 1.4092472791671753, "learning_rate": 1.8756018034795084e-05, "loss": 1.0364, "step": 906 }, { "epoch": 0.18597498462169365, "grad_norm": 1.4460992813110352, "learning_rate": 1.875280786928444e-05, "loss": 1.0162, "step": 907 }, { "epoch": 0.18618002870617184, "grad_norm": 1.4143847227096558, "learning_rate": 1.8749593842551155e-05, "loss": 0.9599, "step": 908 }, { "epoch": 0.18638507279065, "grad_norm": 1.3803716897964478, "learning_rate": 1.8746375956013076e-05, "loss": 0.956, "step": 909 }, { "epoch": 0.18659011687512816, "grad_norm": 1.2948009967803955, "learning_rate": 1.8743154211089743e-05, "loss": 0.9934, "step": 910 }, { "epoch": 0.18679516095960633, "grad_norm": 1.3511126041412354, "learning_rate": 1.87399286092024e-05, "loss": 0.9187, "step": 911 }, { "epoch": 0.1870002050440845, "grad_norm": 1.4360300302505493, "learning_rate": 1.873669915177399e-05, "loss": 1.0359, "step": 912 }, { "epoch": 0.18720524912856265, "grad_norm": 1.3906595706939697, "learning_rate": 1.8733465840229162e-05, "loss": 1.0087, "step": 913 }, { "epoch": 0.1874102932130408, "grad_norm": 1.381023645401001, "learning_rate": 1.873022867599426e-05, "loss": 1.0063, "step": 914 }, { "epoch": 0.18761533729751897, "grad_norm": 1.4220986366271973, "learning_rate": 1.8726987660497332e-05, "loss": 0.951, "step": 915 }, { "epoch": 0.18782038138199714, "grad_norm": 1.3204081058502197, "learning_rate": 1.8723742795168124e-05, "loss": 1.0635, "step": 916 }, { "epoch": 0.1880254254664753, "grad_norm": 1.3240879774093628, "learning_rate": 1.872049408143808e-05, "loss": 1.0334, "step": 917 }, { "epoch": 0.18823046955095346, "grad_norm": 1.3502057790756226, "learning_rate": 1.871724152074033e-05, "loss": 1.0113, "step": 918 }, { "epoch": 0.18843551363543162, "grad_norm": 1.5234510898590088, "learning_rate": 1.871398511450973e-05, "loss": 1.0686, "step": 919 }, { "epoch": 0.18864055771990978, "grad_norm": 1.4439380168914795, "learning_rate": 1.87107248641828e-05, "loss": 0.9844, "step": 920 }, { "epoch": 0.18884560180438795, "grad_norm": 1.430930256843567, "learning_rate": 1.8707460771197773e-05, "loss": 1.0686, "step": 921 }, { "epoch": 0.1890506458888661, "grad_norm": 1.3851648569107056, "learning_rate": 1.8704192836994578e-05, "loss": 1.0367, "step": 922 }, { "epoch": 0.18925568997334427, "grad_norm": 1.3852565288543701, "learning_rate": 1.8700921063014833e-05, "loss": 1.0282, "step": 923 }, { "epoch": 0.18946073405782243, "grad_norm": 1.3627431392669678, "learning_rate": 1.8697645450701852e-05, "loss": 1.0506, "step": 924 }, { "epoch": 0.1896657781423006, "grad_norm": 1.386335849761963, "learning_rate": 1.8694366001500643e-05, "loss": 0.9864, "step": 925 }, { "epoch": 0.18987082222677876, "grad_norm": 1.4799917936325073, "learning_rate": 1.8691082716857904e-05, "loss": 1.0671, "step": 926 }, { "epoch": 0.19007586631125692, "grad_norm": 1.3996399641036987, "learning_rate": 1.8687795598222024e-05, "loss": 0.9263, "step": 927 }, { "epoch": 0.19028091039573508, "grad_norm": 1.409293293952942, "learning_rate": 1.8684504647043093e-05, "loss": 1.0837, "step": 928 }, { "epoch": 0.19048595448021324, "grad_norm": 1.241890788078308, "learning_rate": 1.8681209864772878e-05, "loss": 0.9832, "step": 929 }, { "epoch": 0.1906909985646914, "grad_norm": 1.2955244779586792, "learning_rate": 1.8677911252864844e-05, "loss": 1.0093, "step": 930 }, { "epoch": 0.19089604264916957, "grad_norm": 1.3168213367462158, "learning_rate": 1.8674608812774145e-05, "loss": 0.9969, "step": 931 }, { "epoch": 0.19110108673364773, "grad_norm": 1.4267618656158447, "learning_rate": 1.8671302545957628e-05, "loss": 0.9962, "step": 932 }, { "epoch": 0.1913061308181259, "grad_norm": 1.5310817956924438, "learning_rate": 1.8667992453873812e-05, "loss": 0.9678, "step": 933 }, { "epoch": 0.19151117490260405, "grad_norm": 1.3513398170471191, "learning_rate": 1.8664678537982925e-05, "loss": 0.9659, "step": 934 }, { "epoch": 0.19171621898708222, "grad_norm": 1.3672804832458496, "learning_rate": 1.8661360799746863e-05, "loss": 0.9523, "step": 935 }, { "epoch": 0.19192126307156038, "grad_norm": 1.3420240879058838, "learning_rate": 1.865803924062922e-05, "loss": 1.0333, "step": 936 }, { "epoch": 0.19212630715603854, "grad_norm": 1.1793869733810425, "learning_rate": 1.8654713862095272e-05, "loss": 1.0046, "step": 937 }, { "epoch": 0.1923313512405167, "grad_norm": 1.3655810356140137, "learning_rate": 1.865138466561198e-05, "loss": 0.9928, "step": 938 }, { "epoch": 0.19253639532499486, "grad_norm": 1.4774723052978516, "learning_rate": 1.864805165264799e-05, "loss": 1.0049, "step": 939 }, { "epoch": 0.19274143940947303, "grad_norm": 1.381019949913025, "learning_rate": 1.864471482467363e-05, "loss": 0.9533, "step": 940 }, { "epoch": 0.1929464834939512, "grad_norm": 1.2993464469909668, "learning_rate": 1.8641374183160907e-05, "loss": 0.99, "step": 941 }, { "epoch": 0.19315152757842935, "grad_norm": 1.4591493606567383, "learning_rate": 1.8638029729583524e-05, "loss": 1.0252, "step": 942 }, { "epoch": 0.1933565716629075, "grad_norm": 1.208788514137268, "learning_rate": 1.863468146541685e-05, "loss": 0.9863, "step": 943 }, { "epoch": 0.1935616157473857, "grad_norm": 1.407615065574646, "learning_rate": 1.8631329392137947e-05, "loss": 1.0345, "step": 944 }, { "epoch": 0.19376665983186386, "grad_norm": 1.4102935791015625, "learning_rate": 1.8627973511225544e-05, "loss": 1.0048, "step": 945 }, { "epoch": 0.19397170391634203, "grad_norm": 1.379162073135376, "learning_rate": 1.8624613824160066e-05, "loss": 1.0639, "step": 946 }, { "epoch": 0.1941767480008202, "grad_norm": 1.2642709016799927, "learning_rate": 1.8621250332423603e-05, "loss": 0.9996, "step": 947 }, { "epoch": 0.19438179208529835, "grad_norm": 1.2854350805282593, "learning_rate": 1.8617883037499934e-05, "loss": 0.9905, "step": 948 }, { "epoch": 0.1945868361697765, "grad_norm": 1.3269835710525513, "learning_rate": 1.861451194087451e-05, "loss": 1.0139, "step": 949 }, { "epoch": 0.19479188025425467, "grad_norm": 1.344870924949646, "learning_rate": 1.8611137044034454e-05, "loss": 1.0309, "step": 950 }, { "epoch": 0.19499692433873284, "grad_norm": 1.4142215251922607, "learning_rate": 1.860775834846858e-05, "loss": 1.03, "step": 951 }, { "epoch": 0.195201968423211, "grad_norm": 1.3456612825393677, "learning_rate": 1.860437585566736e-05, "loss": 0.9882, "step": 952 }, { "epoch": 0.19540701250768916, "grad_norm": 1.4418225288391113, "learning_rate": 1.8600989567122958e-05, "loss": 1.0088, "step": 953 }, { "epoch": 0.19561205659216732, "grad_norm": 1.4756274223327637, "learning_rate": 1.8597599484329204e-05, "loss": 1.04, "step": 954 }, { "epoch": 0.19581710067664548, "grad_norm": 1.35809326171875, "learning_rate": 1.8594205608781596e-05, "loss": 1.0592, "step": 955 }, { "epoch": 0.19602214476112365, "grad_norm": 1.4577761888504028, "learning_rate": 1.8590807941977322e-05, "loss": 0.9348, "step": 956 }, { "epoch": 0.1962271888456018, "grad_norm": 1.3015317916870117, "learning_rate": 1.8587406485415226e-05, "loss": 1.0005, "step": 957 }, { "epoch": 0.19643223293007997, "grad_norm": 1.3533462285995483, "learning_rate": 1.8584001240595832e-05, "loss": 1.0089, "step": 958 }, { "epoch": 0.19663727701455813, "grad_norm": 1.4178098440170288, "learning_rate": 1.8580592209021332e-05, "loss": 1.0659, "step": 959 }, { "epoch": 0.1968423210990363, "grad_norm": 1.3018275499343872, "learning_rate": 1.8577179392195585e-05, "loss": 0.9221, "step": 960 }, { "epoch": 0.19704736518351446, "grad_norm": 1.273917317390442, "learning_rate": 1.8573762791624132e-05, "loss": 0.9318, "step": 961 }, { "epoch": 0.19725240926799262, "grad_norm": 1.2548998594284058, "learning_rate": 1.8570342408814173e-05, "loss": 0.9176, "step": 962 }, { "epoch": 0.19745745335247078, "grad_norm": 1.385099172592163, "learning_rate": 1.856691824527458e-05, "loss": 1.0311, "step": 963 }, { "epoch": 0.19766249743694894, "grad_norm": 1.5037800073623657, "learning_rate": 1.856349030251589e-05, "loss": 1.0043, "step": 964 }, { "epoch": 0.1978675415214271, "grad_norm": 1.3947696685791016, "learning_rate": 1.856005858205031e-05, "loss": 1.0404, "step": 965 }, { "epoch": 0.19807258560590527, "grad_norm": 1.3488932847976685, "learning_rate": 1.855662308539172e-05, "loss": 0.9948, "step": 966 }, { "epoch": 0.19827762969038343, "grad_norm": 1.3911641836166382, "learning_rate": 1.855318381405564e-05, "loss": 0.984, "step": 967 }, { "epoch": 0.1984826737748616, "grad_norm": 1.284132480621338, "learning_rate": 1.854974076955929e-05, "loss": 0.936, "step": 968 }, { "epoch": 0.19868771785933975, "grad_norm": 1.3506022691726685, "learning_rate": 1.854629395342154e-05, "loss": 0.9812, "step": 969 }, { "epoch": 0.19889276194381791, "grad_norm": 1.2645511627197266, "learning_rate": 1.854284336716291e-05, "loss": 0.9162, "step": 970 }, { "epoch": 0.19909780602829608, "grad_norm": 1.3161996603012085, "learning_rate": 1.8539389012305597e-05, "loss": 0.9896, "step": 971 }, { "epoch": 0.19930285011277424, "grad_norm": 1.3175296783447266, "learning_rate": 1.8535930890373467e-05, "loss": 0.9875, "step": 972 }, { "epoch": 0.1995078941972524, "grad_norm": 1.4366837739944458, "learning_rate": 1.8532469002892032e-05, "loss": 1.0139, "step": 973 }, { "epoch": 0.19971293828173056, "grad_norm": 1.3574107885360718, "learning_rate": 1.8529003351388477e-05, "loss": 1.0307, "step": 974 }, { "epoch": 0.19991798236620872, "grad_norm": 1.4028923511505127, "learning_rate": 1.852553393739164e-05, "loss": 0.9737, "step": 975 }, { "epoch": 0.20012302645068689, "grad_norm": 1.4170269966125488, "learning_rate": 1.8522060762432022e-05, "loss": 0.9599, "step": 976 }, { "epoch": 0.20032807053516505, "grad_norm": 1.4679478406906128, "learning_rate": 1.8518583828041787e-05, "loss": 0.9979, "step": 977 }, { "epoch": 0.2005331146196432, "grad_norm": 1.447860598564148, "learning_rate": 1.851510313575475e-05, "loss": 0.9633, "step": 978 }, { "epoch": 0.2007381587041214, "grad_norm": 1.2625784873962402, "learning_rate": 1.851161868710639e-05, "loss": 0.9782, "step": 979 }, { "epoch": 0.20094320278859956, "grad_norm": 1.398541808128357, "learning_rate": 1.8508130483633833e-05, "loss": 0.9957, "step": 980 }, { "epoch": 0.20114824687307772, "grad_norm": 1.3864545822143555, "learning_rate": 1.850463852687588e-05, "loss": 1.0089, "step": 981 }, { "epoch": 0.20135329095755589, "grad_norm": 1.4482436180114746, "learning_rate": 1.8501142818372964e-05, "loss": 1.0493, "step": 982 }, { "epoch": 0.20155833504203405, "grad_norm": 1.358792781829834, "learning_rate": 1.8497643359667193e-05, "loss": 0.9913, "step": 983 }, { "epoch": 0.2017633791265122, "grad_norm": 1.4761446714401245, "learning_rate": 1.849414015230232e-05, "loss": 0.9983, "step": 984 }, { "epoch": 0.20196842321099037, "grad_norm": 1.4347089529037476, "learning_rate": 1.8490633197823754e-05, "loss": 0.9916, "step": 985 }, { "epoch": 0.20217346729546853, "grad_norm": 1.358152985572815, "learning_rate": 1.8487122497778557e-05, "loss": 0.9738, "step": 986 }, { "epoch": 0.2023785113799467, "grad_norm": 1.3689377307891846, "learning_rate": 1.848360805371544e-05, "loss": 0.9771, "step": 987 }, { "epoch": 0.20258355546442486, "grad_norm": 1.3519949913024902, "learning_rate": 1.8480089867184775e-05, "loss": 0.9566, "step": 988 }, { "epoch": 0.20278859954890302, "grad_norm": 1.3522851467132568, "learning_rate": 1.8476567939738567e-05, "loss": 0.9257, "step": 989 }, { "epoch": 0.20299364363338118, "grad_norm": 1.491528868675232, "learning_rate": 1.8473042272930495e-05, "loss": 1.0252, "step": 990 }, { "epoch": 0.20319868771785934, "grad_norm": 1.313726782798767, "learning_rate": 1.846951286831587e-05, "loss": 0.8623, "step": 991 }, { "epoch": 0.2034037318023375, "grad_norm": 1.4168007373809814, "learning_rate": 1.8465979727451653e-05, "loss": 0.9472, "step": 992 }, { "epoch": 0.20360877588681567, "grad_norm": 1.5190733671188354, "learning_rate": 1.8462442851896464e-05, "loss": 1.055, "step": 993 }, { "epoch": 0.20381381997129383, "grad_norm": 1.3579100370407104, "learning_rate": 1.8458902243210558e-05, "loss": 0.9849, "step": 994 }, { "epoch": 0.204018864055772, "grad_norm": 1.4717313051223755, "learning_rate": 1.845535790295585e-05, "loss": 1.0202, "step": 995 }, { "epoch": 0.20422390814025015, "grad_norm": 1.434638261795044, "learning_rate": 1.845180983269589e-05, "loss": 1.0388, "step": 996 }, { "epoch": 0.20442895222472832, "grad_norm": 1.2897224426269531, "learning_rate": 1.8448258033995877e-05, "loss": 0.9468, "step": 997 }, { "epoch": 0.20463399630920648, "grad_norm": 1.3011037111282349, "learning_rate": 1.8444702508422655e-05, "loss": 0.9572, "step": 998 }, { "epoch": 0.20483904039368464, "grad_norm": 1.310855507850647, "learning_rate": 1.8441143257544707e-05, "loss": 0.9969, "step": 999 }, { "epoch": 0.2050440844781628, "grad_norm": 1.3547006845474243, "learning_rate": 1.843758028293218e-05, "loss": 0.9727, "step": 1000 }, { "epoch": 0.20524912856264096, "grad_norm": 1.2912858724594116, "learning_rate": 1.843401358615683e-05, "loss": 0.9699, "step": 1001 }, { "epoch": 0.20545417264711913, "grad_norm": 1.3390874862670898, "learning_rate": 1.8430443168792087e-05, "loss": 0.9679, "step": 1002 }, { "epoch": 0.2056592167315973, "grad_norm": 1.2546412944793701, "learning_rate": 1.8426869032413003e-05, "loss": 0.8824, "step": 1003 }, { "epoch": 0.20586426081607545, "grad_norm": 1.3612091541290283, "learning_rate": 1.8423291178596276e-05, "loss": 1.0535, "step": 1004 }, { "epoch": 0.2060693049005536, "grad_norm": 1.3689464330673218, "learning_rate": 1.8419709608920243e-05, "loss": 0.9667, "step": 1005 }, { "epoch": 0.20627434898503177, "grad_norm": 1.3253076076507568, "learning_rate": 1.8416124324964885e-05, "loss": 0.97, "step": 1006 }, { "epoch": 0.20647939306950994, "grad_norm": 1.2571911811828613, "learning_rate": 1.8412535328311813e-05, "loss": 0.9521, "step": 1007 }, { "epoch": 0.2066844371539881, "grad_norm": 1.4516096115112305, "learning_rate": 1.8408942620544286e-05, "loss": 1.0232, "step": 1008 }, { "epoch": 0.20688948123846626, "grad_norm": 1.3601112365722656, "learning_rate": 1.840534620324719e-05, "loss": 1.0155, "step": 1009 }, { "epoch": 0.20709452532294442, "grad_norm": 1.3604000806808472, "learning_rate": 1.840174607800706e-05, "loss": 1.0524, "step": 1010 }, { "epoch": 0.20729956940742258, "grad_norm": 1.4133331775665283, "learning_rate": 1.8398142246412046e-05, "loss": 1.0164, "step": 1011 }, { "epoch": 0.20750461349190075, "grad_norm": 1.335758090019226, "learning_rate": 1.8394534710051956e-05, "loss": 0.9739, "step": 1012 }, { "epoch": 0.2077096575763789, "grad_norm": 1.3799625635147095, "learning_rate": 1.8390923470518217e-05, "loss": 1.1057, "step": 1013 }, { "epoch": 0.20791470166085707, "grad_norm": 1.4419772624969482, "learning_rate": 1.83873085294039e-05, "loss": 1.0265, "step": 1014 }, { "epoch": 0.20811974574533526, "grad_norm": 1.542079210281372, "learning_rate": 1.83836898883037e-05, "loss": 1.0301, "step": 1015 }, { "epoch": 0.20832478982981342, "grad_norm": 1.336113452911377, "learning_rate": 1.8380067548813955e-05, "loss": 1.0033, "step": 1016 }, { "epoch": 0.20852983391429158, "grad_norm": 1.2845306396484375, "learning_rate": 1.8376441512532617e-05, "loss": 1.0085, "step": 1017 }, { "epoch": 0.20873487799876975, "grad_norm": 1.3838720321655273, "learning_rate": 1.8372811781059284e-05, "loss": 0.9881, "step": 1018 }, { "epoch": 0.2089399220832479, "grad_norm": 1.520869255065918, "learning_rate": 1.8369178355995185e-05, "loss": 1.0543, "step": 1019 }, { "epoch": 0.20914496616772607, "grad_norm": 1.3497282266616821, "learning_rate": 1.836554123894316e-05, "loss": 0.9939, "step": 1020 }, { "epoch": 0.20935001025220423, "grad_norm": 1.2935587167739868, "learning_rate": 1.8361900431507702e-05, "loss": 0.8533, "step": 1021 }, { "epoch": 0.2095550543366824, "grad_norm": 1.227770209312439, "learning_rate": 1.835825593529492e-05, "loss": 0.9891, "step": 1022 }, { "epoch": 0.20976009842116056, "grad_norm": 1.3074196577072144, "learning_rate": 1.8354607751912538e-05, "loss": 1.0021, "step": 1023 }, { "epoch": 0.20996514250563872, "grad_norm": 1.290696144104004, "learning_rate": 1.8350955882969937e-05, "loss": 0.9155, "step": 1024 }, { "epoch": 0.21017018659011688, "grad_norm": 1.357527256011963, "learning_rate": 1.8347300330078094e-05, "loss": 0.9558, "step": 1025 }, { "epoch": 0.21037523067459504, "grad_norm": 1.4261053800582886, "learning_rate": 1.834364109484963e-05, "loss": 1.0217, "step": 1026 }, { "epoch": 0.2105802747590732, "grad_norm": 1.2510592937469482, "learning_rate": 1.833997817889878e-05, "loss": 0.936, "step": 1027 }, { "epoch": 0.21078531884355137, "grad_norm": 1.3659616708755493, "learning_rate": 1.833631158384141e-05, "loss": 1.0514, "step": 1028 }, { "epoch": 0.21099036292802953, "grad_norm": 1.344482183456421, "learning_rate": 1.8332641311295e-05, "loss": 0.9731, "step": 1029 }, { "epoch": 0.2111954070125077, "grad_norm": 1.3241699934005737, "learning_rate": 1.8328967362878662e-05, "loss": 1.0113, "step": 1030 }, { "epoch": 0.21140045109698585, "grad_norm": 1.4345929622650146, "learning_rate": 1.8325289740213126e-05, "loss": 1.039, "step": 1031 }, { "epoch": 0.21160549518146402, "grad_norm": 1.5030070543289185, "learning_rate": 1.8321608444920738e-05, "loss": 0.9815, "step": 1032 }, { "epoch": 0.21181053926594218, "grad_norm": 1.3289673328399658, "learning_rate": 1.831792347862547e-05, "loss": 0.9767, "step": 1033 }, { "epoch": 0.21201558335042034, "grad_norm": 1.3361834287643433, "learning_rate": 1.8314234842952916e-05, "loss": 0.9876, "step": 1034 }, { "epoch": 0.2122206274348985, "grad_norm": 1.4639049768447876, "learning_rate": 1.8310542539530282e-05, "loss": 1.0129, "step": 1035 }, { "epoch": 0.21242567151937666, "grad_norm": 1.3536595106124878, "learning_rate": 1.8306846569986388e-05, "loss": 1.0218, "step": 1036 }, { "epoch": 0.21263071560385483, "grad_norm": 1.329634189605713, "learning_rate": 1.830314693595169e-05, "loss": 0.8974, "step": 1037 }, { "epoch": 0.212835759688333, "grad_norm": 1.2708016633987427, "learning_rate": 1.8299443639058238e-05, "loss": 0.9698, "step": 1038 }, { "epoch": 0.21304080377281115, "grad_norm": 1.2240557670593262, "learning_rate": 1.8295736680939714e-05, "loss": 0.9806, "step": 1039 }, { "epoch": 0.2132458478572893, "grad_norm": 1.268416166305542, "learning_rate": 1.8292026063231407e-05, "loss": 1.0388, "step": 1040 }, { "epoch": 0.21345089194176747, "grad_norm": 1.307891607284546, "learning_rate": 1.8288311787570224e-05, "loss": 0.8899, "step": 1041 }, { "epoch": 0.21365593602624564, "grad_norm": 1.2887595891952515, "learning_rate": 1.828459385559468e-05, "loss": 1.0414, "step": 1042 }, { "epoch": 0.2138609801107238, "grad_norm": 1.2782624959945679, "learning_rate": 1.8280872268944914e-05, "loss": 0.9938, "step": 1043 }, { "epoch": 0.21406602419520196, "grad_norm": 1.3551151752471924, "learning_rate": 1.8277147029262664e-05, "loss": 1.0339, "step": 1044 }, { "epoch": 0.21427106827968012, "grad_norm": 1.369005560874939, "learning_rate": 1.8273418138191294e-05, "loss": 1.0327, "step": 1045 }, { "epoch": 0.21447611236415828, "grad_norm": 1.3159486055374146, "learning_rate": 1.8269685597375764e-05, "loss": 0.9565, "step": 1046 }, { "epoch": 0.21468115644863645, "grad_norm": 1.3293524980545044, "learning_rate": 1.8265949408462657e-05, "loss": 1.0598, "step": 1047 }, { "epoch": 0.2148862005331146, "grad_norm": 1.3854233026504517, "learning_rate": 1.8262209573100148e-05, "loss": 0.9833, "step": 1048 }, { "epoch": 0.21509124461759277, "grad_norm": 1.5556117296218872, "learning_rate": 1.8258466092938042e-05, "loss": 0.9388, "step": 1049 }, { "epoch": 0.21529628870207093, "grad_norm": 1.3387846946716309, "learning_rate": 1.825471896962774e-05, "loss": 1.0585, "step": 1050 }, { "epoch": 0.21550133278654912, "grad_norm": 1.241543173789978, "learning_rate": 1.825096820482225e-05, "loss": 0.9527, "step": 1051 }, { "epoch": 0.21570637687102728, "grad_norm": 1.3790283203125, "learning_rate": 1.8247213800176192e-05, "loss": 0.9104, "step": 1052 }, { "epoch": 0.21591142095550545, "grad_norm": 1.4283795356750488, "learning_rate": 1.824345575734578e-05, "loss": 0.9934, "step": 1053 }, { "epoch": 0.2161164650399836, "grad_norm": 1.446750283241272, "learning_rate": 1.823969407798885e-05, "loss": 0.9645, "step": 1054 }, { "epoch": 0.21632150912446177, "grad_norm": 1.4638192653656006, "learning_rate": 1.8235928763764824e-05, "loss": 1.0584, "step": 1055 }, { "epoch": 0.21652655320893993, "grad_norm": 1.3499733209609985, "learning_rate": 1.8232159816334744e-05, "loss": 1.0656, "step": 1056 }, { "epoch": 0.2167315972934181, "grad_norm": 1.368425965309143, "learning_rate": 1.8228387237361245e-05, "loss": 1.0107, "step": 1057 }, { "epoch": 0.21693664137789626, "grad_norm": 1.3512500524520874, "learning_rate": 1.8224611028508566e-05, "loss": 1.0528, "step": 1058 }, { "epoch": 0.21714168546237442, "grad_norm": 1.3639627695083618, "learning_rate": 1.8220831191442546e-05, "loss": 0.9958, "step": 1059 }, { "epoch": 0.21734672954685258, "grad_norm": 1.3948469161987305, "learning_rate": 1.821704772783063e-05, "loss": 1.0189, "step": 1060 }, { "epoch": 0.21755177363133074, "grad_norm": 1.3233929872512817, "learning_rate": 1.8213260639341856e-05, "loss": 0.8706, "step": 1061 }, { "epoch": 0.2177568177158089, "grad_norm": 1.3520877361297607, "learning_rate": 1.8209469927646863e-05, "loss": 0.9678, "step": 1062 }, { "epoch": 0.21796186180028707, "grad_norm": 1.2574959993362427, "learning_rate": 1.8205675594417892e-05, "loss": 0.9879, "step": 1063 }, { "epoch": 0.21816690588476523, "grad_norm": 1.420987844467163, "learning_rate": 1.820187764132878e-05, "loss": 0.9891, "step": 1064 }, { "epoch": 0.2183719499692434, "grad_norm": 1.2774091958999634, "learning_rate": 1.8198076070054956e-05, "loss": 0.9704, "step": 1065 }, { "epoch": 0.21857699405372155, "grad_norm": 1.373063325881958, "learning_rate": 1.8194270882273456e-05, "loss": 1.0232, "step": 1066 }, { "epoch": 0.2187820381381997, "grad_norm": 1.371737003326416, "learning_rate": 1.8190462079662897e-05, "loss": 1.0448, "step": 1067 }, { "epoch": 0.21898708222267788, "grad_norm": 1.340754747390747, "learning_rate": 1.8186649663903503e-05, "loss": 0.9655, "step": 1068 }, { "epoch": 0.21919212630715604, "grad_norm": 1.4337084293365479, "learning_rate": 1.818283363667708e-05, "loss": 0.9886, "step": 1069 }, { "epoch": 0.2193971703916342, "grad_norm": 1.4565569162368774, "learning_rate": 1.8179013999667043e-05, "loss": 0.9726, "step": 1070 }, { "epoch": 0.21960221447611236, "grad_norm": 1.349209189414978, "learning_rate": 1.8175190754558384e-05, "loss": 0.9639, "step": 1071 }, { "epoch": 0.21980725856059052, "grad_norm": 1.4061583280563354, "learning_rate": 1.81713639030377e-05, "loss": 1.0598, "step": 1072 }, { "epoch": 0.22001230264506869, "grad_norm": 1.3375135660171509, "learning_rate": 1.8167533446793165e-05, "loss": 0.9465, "step": 1073 }, { "epoch": 0.22021734672954685, "grad_norm": 1.306372880935669, "learning_rate": 1.8163699387514552e-05, "loss": 0.9365, "step": 1074 }, { "epoch": 0.220422390814025, "grad_norm": 1.2604883909225464, "learning_rate": 1.8159861726893225e-05, "loss": 1.0754, "step": 1075 }, { "epoch": 0.22062743489850317, "grad_norm": 1.450443148612976, "learning_rate": 1.8156020466622134e-05, "loss": 0.9321, "step": 1076 }, { "epoch": 0.22083247898298133, "grad_norm": 1.3410927057266235, "learning_rate": 1.8152175608395814e-05, "loss": 0.9995, "step": 1077 }, { "epoch": 0.2210375230674595, "grad_norm": 1.3741199970245361, "learning_rate": 1.814832715391039e-05, "loss": 0.9767, "step": 1078 }, { "epoch": 0.22124256715193766, "grad_norm": 1.303576111793518, "learning_rate": 1.8144475104863574e-05, "loss": 0.9049, "step": 1079 }, { "epoch": 0.22144761123641582, "grad_norm": 1.2667291164398193, "learning_rate": 1.814061946295466e-05, "loss": 0.9398, "step": 1080 }, { "epoch": 0.22165265532089398, "grad_norm": 1.350213885307312, "learning_rate": 1.813676022988453e-05, "loss": 0.9693, "step": 1081 }, { "epoch": 0.22185769940537214, "grad_norm": 1.344823956489563, "learning_rate": 1.8132897407355657e-05, "loss": 1.0137, "step": 1082 }, { "epoch": 0.2220627434898503, "grad_norm": 1.4863694906234741, "learning_rate": 1.8129030997072082e-05, "loss": 0.9225, "step": 1083 }, { "epoch": 0.22226778757432847, "grad_norm": 1.3153992891311646, "learning_rate": 1.812516100073944e-05, "loss": 1.0096, "step": 1084 }, { "epoch": 0.22247283165880663, "grad_norm": 1.386460542678833, "learning_rate": 1.8121287420064946e-05, "loss": 1.0489, "step": 1085 }, { "epoch": 0.22267787574328482, "grad_norm": 1.2641936540603638, "learning_rate": 1.81174102567574e-05, "loss": 1.0069, "step": 1086 }, { "epoch": 0.22288291982776298, "grad_norm": 1.303567886352539, "learning_rate": 1.811352951252717e-05, "loss": 0.9647, "step": 1087 }, { "epoch": 0.22308796391224114, "grad_norm": 1.3169270753860474, "learning_rate": 1.8109645189086214e-05, "loss": 1.0151, "step": 1088 }, { "epoch": 0.2232930079967193, "grad_norm": 1.4448750019073486, "learning_rate": 1.8105757288148063e-05, "loss": 0.9768, "step": 1089 }, { "epoch": 0.22349805208119747, "grad_norm": 1.3864414691925049, "learning_rate": 1.810186581142784e-05, "loss": 0.9907, "step": 1090 }, { "epoch": 0.22370309616567563, "grad_norm": 1.2951580286026, "learning_rate": 1.8097970760642227e-05, "loss": 0.9339, "step": 1091 }, { "epoch": 0.2239081402501538, "grad_norm": 1.2423142194747925, "learning_rate": 1.809407213750949e-05, "loss": 1.0494, "step": 1092 }, { "epoch": 0.22411318433463195, "grad_norm": 1.3045562505722046, "learning_rate": 1.8090169943749477e-05, "loss": 1.0289, "step": 1093 }, { "epoch": 0.22431822841911012, "grad_norm": 1.2729600667953491, "learning_rate": 1.80862641810836e-05, "loss": 0.9813, "step": 1094 }, { "epoch": 0.22452327250358828, "grad_norm": 1.339719295501709, "learning_rate": 1.8082354851234853e-05, "loss": 0.8888, "step": 1095 }, { "epoch": 0.22472831658806644, "grad_norm": 1.29230535030365, "learning_rate": 1.8078441955927806e-05, "loss": 0.9452, "step": 1096 }, { "epoch": 0.2249333606725446, "grad_norm": 1.3110543489456177, "learning_rate": 1.807452549688859e-05, "loss": 1.035, "step": 1097 }, { "epoch": 0.22513840475702276, "grad_norm": 1.2422465085983276, "learning_rate": 1.8070605475844922e-05, "loss": 0.8656, "step": 1098 }, { "epoch": 0.22534344884150093, "grad_norm": 1.3843640089035034, "learning_rate": 1.806668189452608e-05, "loss": 1.0277, "step": 1099 }, { "epoch": 0.2255484929259791, "grad_norm": 1.2630095481872559, "learning_rate": 1.8062754754662924e-05, "loss": 0.9325, "step": 1100 }, { "epoch": 0.22575353701045725, "grad_norm": 1.2868765592575073, "learning_rate": 1.805882405798787e-05, "loss": 0.9666, "step": 1101 }, { "epoch": 0.2259585810949354, "grad_norm": 1.2729225158691406, "learning_rate": 1.8054889806234906e-05, "loss": 0.938, "step": 1102 }, { "epoch": 0.22616362517941357, "grad_norm": 1.3138407468795776, "learning_rate": 1.8050952001139597e-05, "loss": 1.0605, "step": 1103 }, { "epoch": 0.22636866926389174, "grad_norm": 1.6229217052459717, "learning_rate": 1.8047010644439074e-05, "loss": 1.006, "step": 1104 }, { "epoch": 0.2265737133483699, "grad_norm": 1.383608341217041, "learning_rate": 1.8043065737872024e-05, "loss": 0.9322, "step": 1105 }, { "epoch": 0.22677875743284806, "grad_norm": 1.3505085706710815, "learning_rate": 1.8039117283178715e-05, "loss": 0.8954, "step": 1106 }, { "epoch": 0.22698380151732622, "grad_norm": 1.3370184898376465, "learning_rate": 1.8035165282100963e-05, "loss": 0.9464, "step": 1107 }, { "epoch": 0.22718884560180438, "grad_norm": 1.3202847242355347, "learning_rate": 1.8031209736382163e-05, "loss": 0.9747, "step": 1108 }, { "epoch": 0.22739388968628255, "grad_norm": 1.2897361516952515, "learning_rate": 1.8027250647767267e-05, "loss": 0.9847, "step": 1109 }, { "epoch": 0.2275989337707607, "grad_norm": 1.3432430028915405, "learning_rate": 1.8023288018002792e-05, "loss": 0.9299, "step": 1110 }, { "epoch": 0.22780397785523887, "grad_norm": 1.4454143047332764, "learning_rate": 1.8019321848836822e-05, "loss": 1.0001, "step": 1111 }, { "epoch": 0.22800902193971703, "grad_norm": 1.4268063306808472, "learning_rate": 1.8015352142018984e-05, "loss": 1.0154, "step": 1112 }, { "epoch": 0.2282140660241952, "grad_norm": 1.2693495750427246, "learning_rate": 1.8011378899300492e-05, "loss": 0.9334, "step": 1113 }, { "epoch": 0.22841911010867336, "grad_norm": 1.3574693202972412, "learning_rate": 1.8007402122434098e-05, "loss": 0.9476, "step": 1114 }, { "epoch": 0.22862415419315152, "grad_norm": 1.3416982889175415, "learning_rate": 1.800342181317413e-05, "loss": 0.9004, "step": 1115 }, { "epoch": 0.22882919827762968, "grad_norm": 1.4145610332489014, "learning_rate": 1.7999437973276452e-05, "loss": 1.0177, "step": 1116 }, { "epoch": 0.22903424236210784, "grad_norm": 1.2649195194244385, "learning_rate": 1.799545060449851e-05, "loss": 0.9015, "step": 1117 }, { "epoch": 0.229239286446586, "grad_norm": 1.2765356302261353, "learning_rate": 1.7991459708599293e-05, "loss": 0.9658, "step": 1118 }, { "epoch": 0.22944433053106417, "grad_norm": 1.4232174158096313, "learning_rate": 1.798746528733935e-05, "loss": 1.0174, "step": 1119 }, { "epoch": 0.22964937461554233, "grad_norm": 1.4284048080444336, "learning_rate": 1.7983467342480782e-05, "loss": 1.0207, "step": 1120 }, { "epoch": 0.2298544187000205, "grad_norm": 1.5101372003555298, "learning_rate": 1.7979465875787246e-05, "loss": 1.019, "step": 1121 }, { "epoch": 0.23005946278449868, "grad_norm": 1.3825957775115967, "learning_rate": 1.797546088902396e-05, "loss": 0.9925, "step": 1122 }, { "epoch": 0.23026450686897684, "grad_norm": 1.3675068616867065, "learning_rate": 1.7971452383957678e-05, "loss": 1.0137, "step": 1123 }, { "epoch": 0.230469550953455, "grad_norm": 1.3156507015228271, "learning_rate": 1.796744036235672e-05, "loss": 1.0287, "step": 1124 }, { "epoch": 0.23067459503793317, "grad_norm": 1.1828818321228027, "learning_rate": 1.7963424825990955e-05, "loss": 0.8513, "step": 1125 }, { "epoch": 0.23087963912241133, "grad_norm": 1.410086989402771, "learning_rate": 1.79594057766318e-05, "loss": 0.9747, "step": 1126 }, { "epoch": 0.2310846832068895, "grad_norm": 1.5019898414611816, "learning_rate": 1.7955383216052224e-05, "loss": 1.0004, "step": 1127 }, { "epoch": 0.23128972729136765, "grad_norm": 1.259259819984436, "learning_rate": 1.7951357146026737e-05, "loss": 1.0065, "step": 1128 }, { "epoch": 0.23149477137584581, "grad_norm": 1.3384006023406982, "learning_rate": 1.794732756833141e-05, "loss": 1.0254, "step": 1129 }, { "epoch": 0.23169981546032398, "grad_norm": 1.2870278358459473, "learning_rate": 1.7943294484743856e-05, "loss": 0.9837, "step": 1130 }, { "epoch": 0.23190485954480214, "grad_norm": 1.4222524166107178, "learning_rate": 1.793925789704323e-05, "loss": 0.9877, "step": 1131 }, { "epoch": 0.2321099036292803, "grad_norm": 1.2657005786895752, "learning_rate": 1.7935217807010238e-05, "loss": 0.9923, "step": 1132 }, { "epoch": 0.23231494771375846, "grad_norm": 1.5321449041366577, "learning_rate": 1.793117421642713e-05, "loss": 1.1221, "step": 1133 }, { "epoch": 0.23251999179823662, "grad_norm": 1.4235327243804932, "learning_rate": 1.7927127127077697e-05, "loss": 0.9548, "step": 1134 }, { "epoch": 0.2327250358827148, "grad_norm": 1.278977632522583, "learning_rate": 1.792307654074728e-05, "loss": 0.9624, "step": 1135 }, { "epoch": 0.23293007996719295, "grad_norm": 1.3382147550582886, "learning_rate": 1.7919022459222754e-05, "loss": 1.0312, "step": 1136 }, { "epoch": 0.2331351240516711, "grad_norm": 1.1918971538543701, "learning_rate": 1.7914964884292543e-05, "loss": 1.0068, "step": 1137 }, { "epoch": 0.23334016813614927, "grad_norm": 1.3171131610870361, "learning_rate": 1.791090381774661e-05, "loss": 0.9103, "step": 1138 }, { "epoch": 0.23354521222062744, "grad_norm": 1.2157459259033203, "learning_rate": 1.7906839261376457e-05, "loss": 0.9421, "step": 1139 }, { "epoch": 0.2337502563051056, "grad_norm": 1.3134465217590332, "learning_rate": 1.790277121697513e-05, "loss": 1.014, "step": 1140 }, { "epoch": 0.23395530038958376, "grad_norm": 1.3561480045318604, "learning_rate": 1.789869968633721e-05, "loss": 0.9886, "step": 1141 }, { "epoch": 0.23416034447406192, "grad_norm": 1.384299397468567, "learning_rate": 1.7894624671258813e-05, "loss": 0.9665, "step": 1142 }, { "epoch": 0.23436538855854008, "grad_norm": 1.6037483215332031, "learning_rate": 1.7890546173537596e-05, "loss": 1.0401, "step": 1143 }, { "epoch": 0.23457043264301825, "grad_norm": 1.4509960412979126, "learning_rate": 1.788646419497275e-05, "loss": 0.9665, "step": 1144 }, { "epoch": 0.2347754767274964, "grad_norm": 1.5215336084365845, "learning_rate": 1.7882378737365006e-05, "loss": 1.0477, "step": 1145 }, { "epoch": 0.23498052081197457, "grad_norm": 1.2776111364364624, "learning_rate": 1.787828980251663e-05, "loss": 0.952, "step": 1146 }, { "epoch": 0.23518556489645273, "grad_norm": 1.39557683467865, "learning_rate": 1.7874197392231414e-05, "loss": 0.9993, "step": 1147 }, { "epoch": 0.2353906089809309, "grad_norm": 1.3639154434204102, "learning_rate": 1.7870101508314686e-05, "loss": 0.9813, "step": 1148 }, { "epoch": 0.23559565306540906, "grad_norm": 1.3432292938232422, "learning_rate": 1.7866002152573318e-05, "loss": 0.8728, "step": 1149 }, { "epoch": 0.23580069714988722, "grad_norm": 1.4222241640090942, "learning_rate": 1.7861899326815692e-05, "loss": 0.9596, "step": 1150 }, { "epoch": 0.23600574123436538, "grad_norm": 1.3226814270019531, "learning_rate": 1.785779303285174e-05, "loss": 1.0159, "step": 1151 }, { "epoch": 0.23621078531884354, "grad_norm": 1.4062944650650024, "learning_rate": 1.7853683272492913e-05, "loss": 0.8611, "step": 1152 }, { "epoch": 0.2364158294033217, "grad_norm": 1.533667802810669, "learning_rate": 1.7849570047552203e-05, "loss": 0.974, "step": 1153 }, { "epoch": 0.23662087348779987, "grad_norm": 1.3417179584503174, "learning_rate": 1.784545335984411e-05, "loss": 0.9663, "step": 1154 }, { "epoch": 0.23682591757227803, "grad_norm": 1.3660147190093994, "learning_rate": 1.7841333211184685e-05, "loss": 1.0478, "step": 1155 }, { "epoch": 0.2370309616567562, "grad_norm": 1.571111798286438, "learning_rate": 1.7837209603391486e-05, "loss": 0.9053, "step": 1156 }, { "epoch": 0.23723600574123438, "grad_norm": 1.4019629955291748, "learning_rate": 1.7833082538283615e-05, "loss": 1.0641, "step": 1157 }, { "epoch": 0.23744104982571254, "grad_norm": 1.4322693347930908, "learning_rate": 1.782895201768168e-05, "loss": 0.9596, "step": 1158 }, { "epoch": 0.2376460939101907, "grad_norm": 1.4015074968338013, "learning_rate": 1.7824818043407828e-05, "loss": 0.9951, "step": 1159 }, { "epoch": 0.23785113799466887, "grad_norm": 1.3007038831710815, "learning_rate": 1.7820680617285725e-05, "loss": 1.0194, "step": 1160 }, { "epoch": 0.23805618207914703, "grad_norm": 1.4315872192382812, "learning_rate": 1.7816539741140555e-05, "loss": 0.8777, "step": 1161 }, { "epoch": 0.2382612261636252, "grad_norm": 1.4326422214508057, "learning_rate": 1.7812395416799034e-05, "loss": 1.0121, "step": 1162 }, { "epoch": 0.23846627024810335, "grad_norm": 1.3019133806228638, "learning_rate": 1.780824764608939e-05, "loss": 0.9917, "step": 1163 }, { "epoch": 0.2386713143325815, "grad_norm": 1.2830679416656494, "learning_rate": 1.7804096430841374e-05, "loss": 0.9912, "step": 1164 }, { "epoch": 0.23887635841705968, "grad_norm": 1.432478904724121, "learning_rate": 1.7799941772886266e-05, "loss": 1.016, "step": 1165 }, { "epoch": 0.23908140250153784, "grad_norm": 1.270020604133606, "learning_rate": 1.7795783674056842e-05, "loss": 0.9883, "step": 1166 }, { "epoch": 0.239286446586016, "grad_norm": 1.271995186805725, "learning_rate": 1.7791622136187422e-05, "loss": 0.9834, "step": 1167 }, { "epoch": 0.23949149067049416, "grad_norm": 1.4009344577789307, "learning_rate": 1.7787457161113826e-05, "loss": 0.9343, "step": 1168 }, { "epoch": 0.23969653475497232, "grad_norm": 1.3862451314926147, "learning_rate": 1.7783288750673392e-05, "loss": 0.9529, "step": 1169 }, { "epoch": 0.23990157883945049, "grad_norm": 1.3066648244857788, "learning_rate": 1.7779116906704986e-05, "loss": 0.9692, "step": 1170 }, { "epoch": 0.24010662292392865, "grad_norm": 1.3642100095748901, "learning_rate": 1.7774941631048972e-05, "loss": 0.9774, "step": 1171 }, { "epoch": 0.2403116670084068, "grad_norm": 1.2857609987258911, "learning_rate": 1.7770762925547235e-05, "loss": 0.9303, "step": 1172 }, { "epoch": 0.24051671109288497, "grad_norm": 1.4287478923797607, "learning_rate": 1.7766580792043178e-05, "loss": 0.9265, "step": 1173 }, { "epoch": 0.24072175517736313, "grad_norm": 1.4198768138885498, "learning_rate": 1.776239523238171e-05, "loss": 0.9682, "step": 1174 }, { "epoch": 0.2409267992618413, "grad_norm": 1.2956006526947021, "learning_rate": 1.7758206248409253e-05, "loss": 0.9059, "step": 1175 }, { "epoch": 0.24113184334631946, "grad_norm": 1.400413155555725, "learning_rate": 1.775401384197374e-05, "loss": 1.0067, "step": 1176 }, { "epoch": 0.24133688743079762, "grad_norm": 1.3259800672531128, "learning_rate": 1.7749818014924612e-05, "loss": 0.9409, "step": 1177 }, { "epoch": 0.24154193151527578, "grad_norm": 1.4625519514083862, "learning_rate": 1.774561876911282e-05, "loss": 1.0155, "step": 1178 }, { "epoch": 0.24174697559975394, "grad_norm": 1.5114938020706177, "learning_rate": 1.7741416106390828e-05, "loss": 1.0486, "step": 1179 }, { "epoch": 0.2419520196842321, "grad_norm": 1.2968454360961914, "learning_rate": 1.7737210028612594e-05, "loss": 0.9327, "step": 1180 }, { "epoch": 0.24215706376871027, "grad_norm": 1.323199987411499, "learning_rate": 1.7733000537633605e-05, "loss": 0.9555, "step": 1181 }, { "epoch": 0.24236210785318843, "grad_norm": 1.4545506238937378, "learning_rate": 1.7728787635310828e-05, "loss": 0.9348, "step": 1182 }, { "epoch": 0.2425671519376666, "grad_norm": 1.308770775794983, "learning_rate": 1.772457132350275e-05, "loss": 1.0087, "step": 1183 }, { "epoch": 0.24277219602214475, "grad_norm": 1.3609057664871216, "learning_rate": 1.7720351604069363e-05, "loss": 0.9606, "step": 1184 }, { "epoch": 0.24297724010662292, "grad_norm": 1.2854371070861816, "learning_rate": 1.7716128478872155e-05, "loss": 1.0037, "step": 1185 }, { "epoch": 0.24318228419110108, "grad_norm": 1.3135857582092285, "learning_rate": 1.7711901949774125e-05, "loss": 1.0594, "step": 1186 }, { "epoch": 0.24338732827557924, "grad_norm": 1.2885499000549316, "learning_rate": 1.770767201863976e-05, "loss": 0.9768, "step": 1187 }, { "epoch": 0.2435923723600574, "grad_norm": 1.3479052782058716, "learning_rate": 1.770343868733506e-05, "loss": 0.9716, "step": 1188 }, { "epoch": 0.24379741644453556, "grad_norm": 1.3237407207489014, "learning_rate": 1.7699201957727523e-05, "loss": 1.0756, "step": 1189 }, { "epoch": 0.24400246052901373, "grad_norm": 1.2614693641662598, "learning_rate": 1.7694961831686143e-05, "loss": 1.0198, "step": 1190 }, { "epoch": 0.2442075046134919, "grad_norm": 1.2460771799087524, "learning_rate": 1.769071831108141e-05, "loss": 0.9607, "step": 1191 }, { "epoch": 0.24441254869797005, "grad_norm": 1.3878506422042847, "learning_rate": 1.7686471397785322e-05, "loss": 0.9968, "step": 1192 }, { "epoch": 0.24461759278244824, "grad_norm": 1.2746821641921997, "learning_rate": 1.768222109367136e-05, "loss": 0.9739, "step": 1193 }, { "epoch": 0.2448226368669264, "grad_norm": 1.222460150718689, "learning_rate": 1.7677967400614514e-05, "loss": 0.9546, "step": 1194 }, { "epoch": 0.24502768095140456, "grad_norm": 1.4199787378311157, "learning_rate": 1.7673710320491257e-05, "loss": 0.9987, "step": 1195 }, { "epoch": 0.24523272503588273, "grad_norm": 1.4119070768356323, "learning_rate": 1.7669449855179562e-05, "loss": 0.9656, "step": 1196 }, { "epoch": 0.2454377691203609, "grad_norm": 1.359210729598999, "learning_rate": 1.76651860065589e-05, "loss": 0.9964, "step": 1197 }, { "epoch": 0.24564281320483905, "grad_norm": 1.3219720125198364, "learning_rate": 1.7660918776510226e-05, "loss": 0.9898, "step": 1198 }, { "epoch": 0.2458478572893172, "grad_norm": 1.30588698387146, "learning_rate": 1.7656648166915985e-05, "loss": 0.9378, "step": 1199 }, { "epoch": 0.24605290137379537, "grad_norm": 1.327968716621399, "learning_rate": 1.765237417966013e-05, "loss": 1.0176, "step": 1200 }, { "epoch": 0.24625794545827354, "grad_norm": 1.2568206787109375, "learning_rate": 1.7648096816628083e-05, "loss": 0.9594, "step": 1201 }, { "epoch": 0.2464629895427517, "grad_norm": 1.2831676006317139, "learning_rate": 1.764381607970677e-05, "loss": 0.9696, "step": 1202 }, { "epoch": 0.24666803362722986, "grad_norm": 1.2851057052612305, "learning_rate": 1.7639531970784594e-05, "loss": 1.0053, "step": 1203 }, { "epoch": 0.24687307771170802, "grad_norm": 1.3465584516525269, "learning_rate": 1.763524449175145e-05, "loss": 1.0256, "step": 1204 }, { "epoch": 0.24707812179618618, "grad_norm": 1.3543204069137573, "learning_rate": 1.763095364449873e-05, "loss": 0.937, "step": 1205 }, { "epoch": 0.24728316588066435, "grad_norm": 1.3626004457473755, "learning_rate": 1.7626659430919297e-05, "loss": 0.8876, "step": 1206 }, { "epoch": 0.2474882099651425, "grad_norm": 1.3356521129608154, "learning_rate": 1.7622361852907506e-05, "loss": 0.93, "step": 1207 }, { "epoch": 0.24769325404962067, "grad_norm": 1.2878892421722412, "learning_rate": 1.7618060912359188e-05, "loss": 1.0423, "step": 1208 }, { "epoch": 0.24789829813409883, "grad_norm": 1.3041411638259888, "learning_rate": 1.761375661117168e-05, "loss": 0.9839, "step": 1209 }, { "epoch": 0.248103342218577, "grad_norm": 1.25388765335083, "learning_rate": 1.760944895124377e-05, "loss": 0.92, "step": 1210 }, { "epoch": 0.24830838630305516, "grad_norm": 1.2990968227386475, "learning_rate": 1.7605137934475753e-05, "loss": 0.9367, "step": 1211 }, { "epoch": 0.24851343038753332, "grad_norm": 1.5356906652450562, "learning_rate": 1.760082356276939e-05, "loss": 0.8949, "step": 1212 }, { "epoch": 0.24871847447201148, "grad_norm": 1.3508087396621704, "learning_rate": 1.759650583802793e-05, "loss": 1.0152, "step": 1213 }, { "epoch": 0.24892351855648964, "grad_norm": 1.4486762285232544, "learning_rate": 1.75921847621561e-05, "loss": 1.0016, "step": 1214 }, { "epoch": 0.2491285626409678, "grad_norm": 1.365708589553833, "learning_rate": 1.75878603370601e-05, "loss": 0.9456, "step": 1215 }, { "epoch": 0.24933360672544597, "grad_norm": 1.3795292377471924, "learning_rate": 1.7583532564647615e-05, "loss": 1.0574, "step": 1216 }, { "epoch": 0.24953865080992413, "grad_norm": 1.2984440326690674, "learning_rate": 1.75792014468278e-05, "loss": 0.9924, "step": 1217 }, { "epoch": 0.2497436948944023, "grad_norm": 1.3369790315628052, "learning_rate": 1.757486698551129e-05, "loss": 1.0312, "step": 1218 }, { "epoch": 0.24994873897888045, "grad_norm": 1.3120676279067993, "learning_rate": 1.7570529182610197e-05, "loss": 1.0012, "step": 1219 }, { "epoch": 0.2501537830633586, "grad_norm": 1.309757947921753, "learning_rate": 1.7566188040038094e-05, "loss": 0.9614, "step": 1220 }, { "epoch": 0.2503588271478368, "grad_norm": 1.2446894645690918, "learning_rate": 1.756184355971005e-05, "loss": 1.0105, "step": 1221 }, { "epoch": 0.25056387123231494, "grad_norm": 1.3270683288574219, "learning_rate": 1.7557495743542586e-05, "loss": 0.994, "step": 1222 }, { "epoch": 0.25076891531679313, "grad_norm": 1.3856613636016846, "learning_rate": 1.7553144593453702e-05, "loss": 0.9678, "step": 1223 }, { "epoch": 0.25097395940127126, "grad_norm": 1.3544923067092896, "learning_rate": 1.7548790111362867e-05, "loss": 0.929, "step": 1224 }, { "epoch": 0.25117900348574945, "grad_norm": 1.3870161771774292, "learning_rate": 1.754443229919103e-05, "loss": 0.9863, "step": 1225 }, { "epoch": 0.2513840475702276, "grad_norm": 1.2445118427276611, "learning_rate": 1.754007115886059e-05, "loss": 0.9366, "step": 1226 }, { "epoch": 0.2515890916547058, "grad_norm": 1.2849295139312744, "learning_rate": 1.7535706692295436e-05, "loss": 0.9257, "step": 1227 }, { "epoch": 0.2517941357391839, "grad_norm": 1.2992714643478394, "learning_rate": 1.7531338901420906e-05, "loss": 0.874, "step": 1228 }, { "epoch": 0.2519991798236621, "grad_norm": 1.3045486211776733, "learning_rate": 1.752696778816381e-05, "loss": 1.0324, "step": 1229 }, { "epoch": 0.25220422390814023, "grad_norm": 1.333611249923706, "learning_rate": 1.7522593354452434e-05, "loss": 0.9972, "step": 1230 }, { "epoch": 0.2524092679926184, "grad_norm": 1.4056370258331299, "learning_rate": 1.7518215602216514e-05, "loss": 1.0457, "step": 1231 }, { "epoch": 0.25261431207709656, "grad_norm": 1.3610551357269287, "learning_rate": 1.7513834533387256e-05, "loss": 0.9178, "step": 1232 }, { "epoch": 0.25281935616157475, "grad_norm": 1.3668875694274902, "learning_rate": 1.7509450149897326e-05, "loss": 1.0478, "step": 1233 }, { "epoch": 0.2530244002460529, "grad_norm": 1.3268946409225464, "learning_rate": 1.7505062453680864e-05, "loss": 1.0505, "step": 1234 }, { "epoch": 0.2532294443305311, "grad_norm": 1.2658952474594116, "learning_rate": 1.7500671446673457e-05, "loss": 1.0575, "step": 1235 }, { "epoch": 0.2534344884150092, "grad_norm": 1.4093130826950073, "learning_rate": 1.749627713081216e-05, "loss": 0.8716, "step": 1236 }, { "epoch": 0.2536395324994874, "grad_norm": 1.2878867387771606, "learning_rate": 1.7491879508035488e-05, "loss": 0.9773, "step": 1237 }, { "epoch": 0.25384457658396553, "grad_norm": 1.2680344581604004, "learning_rate": 1.7487478580283412e-05, "loss": 0.9311, "step": 1238 }, { "epoch": 0.2540496206684437, "grad_norm": 1.2692151069641113, "learning_rate": 1.7483074349497355e-05, "loss": 0.968, "step": 1239 }, { "epoch": 0.25425466475292186, "grad_norm": 1.374226689338684, "learning_rate": 1.7478666817620212e-05, "loss": 0.959, "step": 1240 }, { "epoch": 0.25445970883740004, "grad_norm": 1.4501501321792603, "learning_rate": 1.7474255986596323e-05, "loss": 1.064, "step": 1241 }, { "epoch": 0.2546647529218782, "grad_norm": 1.3463739156723022, "learning_rate": 1.746984185837149e-05, "loss": 0.984, "step": 1242 }, { "epoch": 0.25486979700635637, "grad_norm": 1.3961005210876465, "learning_rate": 1.746542443489296e-05, "loss": 0.9543, "step": 1243 }, { "epoch": 0.2550748410908345, "grad_norm": 1.3604553937911987, "learning_rate": 1.7461003718109443e-05, "loss": 1.0022, "step": 1244 }, { "epoch": 0.2552798851753127, "grad_norm": 1.3335989713668823, "learning_rate": 1.7456579709971094e-05, "loss": 0.9694, "step": 1245 }, { "epoch": 0.2554849292597909, "grad_norm": 1.4115982055664062, "learning_rate": 1.7452152412429535e-05, "loss": 1.0871, "step": 1246 }, { "epoch": 0.255689973344269, "grad_norm": 1.4088658094406128, "learning_rate": 1.744772182743782e-05, "loss": 1.0043, "step": 1247 }, { "epoch": 0.2558950174287472, "grad_norm": 1.366451382637024, "learning_rate": 1.7443287956950462e-05, "loss": 1.0087, "step": 1248 }, { "epoch": 0.25610006151322534, "grad_norm": 1.399969458580017, "learning_rate": 1.7438850802923432e-05, "loss": 0.9826, "step": 1249 }, { "epoch": 0.25630510559770353, "grad_norm": 1.3366334438323975, "learning_rate": 1.743441036731413e-05, "loss": 0.9625, "step": 1250 }, { "epoch": 0.25651014968218167, "grad_norm": 1.3868294954299927, "learning_rate": 1.7429966652081414e-05, "loss": 1.0226, "step": 1251 }, { "epoch": 0.25671519376665985, "grad_norm": 1.3871575593948364, "learning_rate": 1.7425519659185596e-05, "loss": 0.9788, "step": 1252 }, { "epoch": 0.256920237851138, "grad_norm": 1.3940293788909912, "learning_rate": 1.742106939058843e-05, "loss": 0.9888, "step": 1253 }, { "epoch": 0.2571252819356162, "grad_norm": 1.3332473039627075, "learning_rate": 1.74166158482531e-05, "loss": 1.0065, "step": 1254 }, { "epoch": 0.2573303260200943, "grad_norm": 1.3337465524673462, "learning_rate": 1.7412159034144258e-05, "loss": 1.0034, "step": 1255 }, { "epoch": 0.2575353701045725, "grad_norm": 1.295658826828003, "learning_rate": 1.7407698950227975e-05, "loss": 0.8841, "step": 1256 }, { "epoch": 0.25774041418905064, "grad_norm": 1.4128228425979614, "learning_rate": 1.740323559847179e-05, "loss": 0.8953, "step": 1257 }, { "epoch": 0.2579454582735288, "grad_norm": 1.287665605545044, "learning_rate": 1.7398768980844664e-05, "loss": 1.0638, "step": 1258 }, { "epoch": 0.25815050235800696, "grad_norm": 1.3261339664459229, "learning_rate": 1.7394299099317003e-05, "loss": 1.0683, "step": 1259 }, { "epoch": 0.25835554644248515, "grad_norm": 1.3048585653305054, "learning_rate": 1.738982595586066e-05, "loss": 1.0142, "step": 1260 }, { "epoch": 0.2585605905269633, "grad_norm": 1.2331238985061646, "learning_rate": 1.738534955244892e-05, "loss": 0.9934, "step": 1261 }, { "epoch": 0.2587656346114415, "grad_norm": 1.2985320091247559, "learning_rate": 1.738086989105651e-05, "loss": 0.8827, "step": 1262 }, { "epoch": 0.2589706786959196, "grad_norm": 1.2920548915863037, "learning_rate": 1.737638697365959e-05, "loss": 0.9337, "step": 1263 }, { "epoch": 0.2591757227803978, "grad_norm": 1.342658281326294, "learning_rate": 1.737190080223576e-05, "loss": 1.0186, "step": 1264 }, { "epoch": 0.25938076686487593, "grad_norm": 1.387194037437439, "learning_rate": 1.736741137876405e-05, "loss": 0.9885, "step": 1265 }, { "epoch": 0.2595858109493541, "grad_norm": 1.337889552116394, "learning_rate": 1.736291870522494e-05, "loss": 0.9574, "step": 1266 }, { "epoch": 0.25979085503383226, "grad_norm": 1.2985119819641113, "learning_rate": 1.735842278360032e-05, "loss": 1.0124, "step": 1267 }, { "epoch": 0.25999589911831045, "grad_norm": 1.314538598060608, "learning_rate": 1.735392361587353e-05, "loss": 1.0095, "step": 1268 }, { "epoch": 0.2602009432027886, "grad_norm": 1.3063316345214844, "learning_rate": 1.7349421204029343e-05, "loss": 0.9534, "step": 1269 }, { "epoch": 0.26040598728726677, "grad_norm": 1.2660037279129028, "learning_rate": 1.7344915550053948e-05, "loss": 0.9404, "step": 1270 }, { "epoch": 0.2606110313717449, "grad_norm": 1.3835657835006714, "learning_rate": 1.734040665593498e-05, "loss": 1.0033, "step": 1271 }, { "epoch": 0.2608160754562231, "grad_norm": 1.3241575956344604, "learning_rate": 1.73358945236615e-05, "loss": 1.0079, "step": 1272 }, { "epoch": 0.26102111954070123, "grad_norm": 1.2982946634292603, "learning_rate": 1.7331379155223986e-05, "loss": 1.0225, "step": 1273 }, { "epoch": 0.2612261636251794, "grad_norm": 1.2507314682006836, "learning_rate": 1.7326860552614357e-05, "loss": 0.9051, "step": 1274 }, { "epoch": 0.26143120770965755, "grad_norm": 1.4296984672546387, "learning_rate": 1.7322338717825954e-05, "loss": 0.9593, "step": 1275 }, { "epoch": 0.26163625179413574, "grad_norm": 1.3716654777526855, "learning_rate": 1.7317813652853545e-05, "loss": 1.0026, "step": 1276 }, { "epoch": 0.2618412958786139, "grad_norm": 1.2745046615600586, "learning_rate": 1.7313285359693322e-05, "loss": 0.9647, "step": 1277 }, { "epoch": 0.26204633996309207, "grad_norm": 1.3447949886322021, "learning_rate": 1.7308753840342895e-05, "loss": 0.9765, "step": 1278 }, { "epoch": 0.2622513840475702, "grad_norm": 1.2614903450012207, "learning_rate": 1.7304219096801312e-05, "loss": 0.9174, "step": 1279 }, { "epoch": 0.2624564281320484, "grad_norm": 1.268784523010254, "learning_rate": 1.7299681131069026e-05, "loss": 1.0212, "step": 1280 }, { "epoch": 0.2626614722165265, "grad_norm": 1.3475351333618164, "learning_rate": 1.7295139945147924e-05, "loss": 1.0377, "step": 1281 }, { "epoch": 0.2628665163010047, "grad_norm": 1.2196855545043945, "learning_rate": 1.7290595541041312e-05, "loss": 0.9618, "step": 1282 }, { "epoch": 0.2630715603854829, "grad_norm": 1.2512342929840088, "learning_rate": 1.7286047920753906e-05, "loss": 0.9747, "step": 1283 }, { "epoch": 0.26327660446996104, "grad_norm": 1.280278205871582, "learning_rate": 1.7281497086291853e-05, "loss": 0.9013, "step": 1284 }, { "epoch": 0.26348164855443923, "grad_norm": 1.3960342407226562, "learning_rate": 1.7276943039662716e-05, "loss": 0.9653, "step": 1285 }, { "epoch": 0.26368669263891736, "grad_norm": 1.2818423509597778, "learning_rate": 1.7272385782875463e-05, "loss": 0.9418, "step": 1286 }, { "epoch": 0.26389173672339555, "grad_norm": 1.3453987836837769, "learning_rate": 1.7267825317940494e-05, "loss": 1.003, "step": 1287 }, { "epoch": 0.2640967808078737, "grad_norm": 1.3792123794555664, "learning_rate": 1.7263261646869614e-05, "loss": 0.96, "step": 1288 }, { "epoch": 0.2643018248923519, "grad_norm": 1.4088505506515503, "learning_rate": 1.7258694771676046e-05, "loss": 0.9992, "step": 1289 }, { "epoch": 0.26450686897683, "grad_norm": 1.3352845907211304, "learning_rate": 1.725412469437443e-05, "loss": 0.9719, "step": 1290 }, { "epoch": 0.2647119130613082, "grad_norm": 1.309144377708435, "learning_rate": 1.7249551416980806e-05, "loss": 1.015, "step": 1291 }, { "epoch": 0.26491695714578634, "grad_norm": 1.3023802042007446, "learning_rate": 1.724497494151264e-05, "loss": 0.9971, "step": 1292 }, { "epoch": 0.2651220012302645, "grad_norm": 1.229957103729248, "learning_rate": 1.7240395269988802e-05, "loss": 0.9751, "step": 1293 }, { "epoch": 0.26532704531474266, "grad_norm": 1.3113174438476562, "learning_rate": 1.7235812404429574e-05, "loss": 0.9791, "step": 1294 }, { "epoch": 0.26553208939922085, "grad_norm": 1.3432365655899048, "learning_rate": 1.723122634685664e-05, "loss": 0.9602, "step": 1295 }, { "epoch": 0.265737133483699, "grad_norm": 1.3533802032470703, "learning_rate": 1.7226637099293112e-05, "loss": 1.0308, "step": 1296 }, { "epoch": 0.2659421775681772, "grad_norm": 1.405342698097229, "learning_rate": 1.7222044663763484e-05, "loss": 0.8921, "step": 1297 }, { "epoch": 0.2661472216526553, "grad_norm": 1.3072021007537842, "learning_rate": 1.721744904229367e-05, "loss": 0.9618, "step": 1298 }, { "epoch": 0.2663522657371335, "grad_norm": 1.2155561447143555, "learning_rate": 1.7212850236910984e-05, "loss": 0.9818, "step": 1299 }, { "epoch": 0.26655730982161163, "grad_norm": 1.3681777715682983, "learning_rate": 1.7208248249644154e-05, "loss": 0.9611, "step": 1300 }, { "epoch": 0.2667623539060898, "grad_norm": 1.2869932651519775, "learning_rate": 1.7203643082523306e-05, "loss": 0.9441, "step": 1301 }, { "epoch": 0.26696739799056796, "grad_norm": 1.3332700729370117, "learning_rate": 1.7199034737579962e-05, "loss": 1.0451, "step": 1302 }, { "epoch": 0.26717244207504615, "grad_norm": 1.2755937576293945, "learning_rate": 1.7194423216847054e-05, "loss": 1.0066, "step": 1303 }, { "epoch": 0.2673774861595243, "grad_norm": 1.3365075588226318, "learning_rate": 1.7189808522358914e-05, "loss": 0.9098, "step": 1304 }, { "epoch": 0.26758253024400247, "grad_norm": 1.2974265813827515, "learning_rate": 1.7185190656151273e-05, "loss": 0.9938, "step": 1305 }, { "epoch": 0.2677875743284806, "grad_norm": 1.3056514263153076, "learning_rate": 1.718056962026126e-05, "loss": 0.9424, "step": 1306 }, { "epoch": 0.2679926184129588, "grad_norm": 1.376193881034851, "learning_rate": 1.7175945416727405e-05, "loss": 0.9687, "step": 1307 }, { "epoch": 0.26819766249743693, "grad_norm": 1.4266325235366821, "learning_rate": 1.7171318047589637e-05, "loss": 0.9808, "step": 1308 }, { "epoch": 0.2684027065819151, "grad_norm": 1.313012719154358, "learning_rate": 1.7166687514889272e-05, "loss": 0.9141, "step": 1309 }, { "epoch": 0.26860775066639325, "grad_norm": 1.376467227935791, "learning_rate": 1.716205382066903e-05, "loss": 1.0262, "step": 1310 }, { "epoch": 0.26881279475087144, "grad_norm": 1.270392894744873, "learning_rate": 1.7157416966973026e-05, "loss": 1.0228, "step": 1311 }, { "epoch": 0.2690178388353496, "grad_norm": 1.3815577030181885, "learning_rate": 1.7152776955846768e-05, "loss": 1.0546, "step": 1312 }, { "epoch": 0.26922288291982777, "grad_norm": 1.3759782314300537, "learning_rate": 1.7148133789337145e-05, "loss": 0.9561, "step": 1313 }, { "epoch": 0.2694279270043059, "grad_norm": 1.2949576377868652, "learning_rate": 1.714348746949246e-05, "loss": 0.9819, "step": 1314 }, { "epoch": 0.2696329710887841, "grad_norm": 1.2790327072143555, "learning_rate": 1.7138837998362387e-05, "loss": 1.0337, "step": 1315 }, { "epoch": 0.2698380151732622, "grad_norm": 1.2890175580978394, "learning_rate": 1.7134185377998006e-05, "loss": 0.998, "step": 1316 }, { "epoch": 0.2700430592577404, "grad_norm": 1.36770761013031, "learning_rate": 1.7129529610451775e-05, "loss": 0.9869, "step": 1317 }, { "epoch": 0.2702481033422186, "grad_norm": 1.2688466310501099, "learning_rate": 1.7124870697777543e-05, "loss": 0.9818, "step": 1318 }, { "epoch": 0.27045314742669674, "grad_norm": 1.221932053565979, "learning_rate": 1.7120208642030548e-05, "loss": 0.975, "step": 1319 }, { "epoch": 0.27065819151117493, "grad_norm": 1.389574646949768, "learning_rate": 1.7115543445267412e-05, "loss": 0.9472, "step": 1320 }, { "epoch": 0.27086323559565306, "grad_norm": 1.3236939907073975, "learning_rate": 1.711087510954615e-05, "loss": 0.953, "step": 1321 }, { "epoch": 0.27106827968013125, "grad_norm": 1.3710825443267822, "learning_rate": 1.7106203636926154e-05, "loss": 0.984, "step": 1322 }, { "epoch": 0.2712733237646094, "grad_norm": 1.2882845401763916, "learning_rate": 1.7101529029468196e-05, "loss": 0.9397, "step": 1323 }, { "epoch": 0.2714783678490876, "grad_norm": 1.3259525299072266, "learning_rate": 1.7096851289234448e-05, "loss": 0.9365, "step": 1324 }, { "epoch": 0.2716834119335657, "grad_norm": 1.324660301208496, "learning_rate": 1.7092170418288447e-05, "loss": 0.9873, "step": 1325 }, { "epoch": 0.2718884560180439, "grad_norm": 1.2142601013183594, "learning_rate": 1.7087486418695122e-05, "loss": 0.8993, "step": 1326 }, { "epoch": 0.27209350010252203, "grad_norm": 1.2864335775375366, "learning_rate": 1.7082799292520767e-05, "loss": 1.0089, "step": 1327 }, { "epoch": 0.2722985441870002, "grad_norm": 1.3344192504882812, "learning_rate": 1.7078109041833076e-05, "loss": 1.0034, "step": 1328 }, { "epoch": 0.27250358827147836, "grad_norm": 1.2756164073944092, "learning_rate": 1.7073415668701106e-05, "loss": 0.9676, "step": 1329 }, { "epoch": 0.27270863235595655, "grad_norm": 1.2819377183914185, "learning_rate": 1.7068719175195297e-05, "loss": 0.8881, "step": 1330 }, { "epoch": 0.2729136764404347, "grad_norm": 1.351406455039978, "learning_rate": 1.706401956338747e-05, "loss": 0.9469, "step": 1331 }, { "epoch": 0.2731187205249129, "grad_norm": 1.314427375793457, "learning_rate": 1.7059316835350806e-05, "loss": 1.0497, "step": 1332 }, { "epoch": 0.273323764609391, "grad_norm": 1.3263263702392578, "learning_rate": 1.705461099315988e-05, "loss": 0.988, "step": 1333 }, { "epoch": 0.2735288086938692, "grad_norm": 1.4199163913726807, "learning_rate": 1.704990203889063e-05, "loss": 1.0176, "step": 1334 }, { "epoch": 0.27373385277834733, "grad_norm": 1.2934964895248413, "learning_rate": 1.704518997462037e-05, "loss": 0.9428, "step": 1335 }, { "epoch": 0.2739388968628255, "grad_norm": 1.2900067567825317, "learning_rate": 1.7040474802427782e-05, "loss": 0.9717, "step": 1336 }, { "epoch": 0.27414394094730365, "grad_norm": 1.5042140483856201, "learning_rate": 1.7035756524392924e-05, "loss": 0.8628, "step": 1337 }, { "epoch": 0.27434898503178184, "grad_norm": 1.3870371580123901, "learning_rate": 1.703103514259722e-05, "loss": 0.9057, "step": 1338 }, { "epoch": 0.27455402911626, "grad_norm": 1.2911804914474487, "learning_rate": 1.7026310659123462e-05, "loss": 1.0426, "step": 1339 }, { "epoch": 0.27475907320073817, "grad_norm": 1.2812821865081787, "learning_rate": 1.7021583076055824e-05, "loss": 0.963, "step": 1340 }, { "epoch": 0.2749641172852163, "grad_norm": 1.219031572341919, "learning_rate": 1.7016852395479827e-05, "loss": 0.8789, "step": 1341 }, { "epoch": 0.2751691613696945, "grad_norm": 1.402260661125183, "learning_rate": 1.7012118619482376e-05, "loss": 1.0078, "step": 1342 }, { "epoch": 0.2753742054541726, "grad_norm": 1.2694871425628662, "learning_rate": 1.7007381750151725e-05, "loss": 1.0358, "step": 1343 }, { "epoch": 0.2755792495386508, "grad_norm": 1.2388606071472168, "learning_rate": 1.700264178957751e-05, "loss": 0.9406, "step": 1344 }, { "epoch": 0.27578429362312895, "grad_norm": 1.2639353275299072, "learning_rate": 1.699789873985072e-05, "loss": 1.0185, "step": 1345 }, { "epoch": 0.27598933770760714, "grad_norm": 1.2373020648956299, "learning_rate": 1.69931526030637e-05, "loss": 0.8351, "step": 1346 }, { "epoch": 0.2761943817920853, "grad_norm": 1.2686569690704346, "learning_rate": 1.6988403381310177e-05, "loss": 1.0404, "step": 1347 }, { "epoch": 0.27639942587656346, "grad_norm": 1.3242428302764893, "learning_rate": 1.6983651076685224e-05, "loss": 0.9882, "step": 1348 }, { "epoch": 0.2766044699610416, "grad_norm": 1.4029061794281006, "learning_rate": 1.6978895691285276e-05, "loss": 0.8984, "step": 1349 }, { "epoch": 0.2768095140455198, "grad_norm": 1.271247386932373, "learning_rate": 1.6974137227208126e-05, "loss": 0.9277, "step": 1350 }, { "epoch": 0.2770145581299979, "grad_norm": 1.2796318531036377, "learning_rate": 1.696937568655294e-05, "loss": 0.9791, "step": 1351 }, { "epoch": 0.2772196022144761, "grad_norm": 1.4134926795959473, "learning_rate": 1.696461107142021e-05, "loss": 1.042, "step": 1352 }, { "epoch": 0.2774246462989543, "grad_norm": 1.2789820432662964, "learning_rate": 1.6959843383911816e-05, "loss": 0.9932, "step": 1353 }, { "epoch": 0.27762969038343244, "grad_norm": 1.4225202798843384, "learning_rate": 1.6955072626130983e-05, "loss": 0.9203, "step": 1354 }, { "epoch": 0.2778347344679106, "grad_norm": 1.3571778535842896, "learning_rate": 1.6950298800182278e-05, "loss": 0.965, "step": 1355 }, { "epoch": 0.27803977855238876, "grad_norm": 1.3537254333496094, "learning_rate": 1.6945521908171633e-05, "loss": 0.9841, "step": 1356 }, { "epoch": 0.27824482263686695, "grad_norm": 1.3366835117340088, "learning_rate": 1.6940741952206342e-05, "loss": 0.9268, "step": 1357 }, { "epoch": 0.2784498667213451, "grad_norm": 1.2884011268615723, "learning_rate": 1.6935958934395028e-05, "loss": 0.9846, "step": 1358 }, { "epoch": 0.2786549108058233, "grad_norm": 1.204514741897583, "learning_rate": 1.693117285684768e-05, "loss": 0.852, "step": 1359 }, { "epoch": 0.2788599548903014, "grad_norm": 1.340244174003601, "learning_rate": 1.6926383721675633e-05, "loss": 1.0388, "step": 1360 }, { "epoch": 0.2790649989747796, "grad_norm": 1.3165150880813599, "learning_rate": 1.692159153099157e-05, "loss": 0.972, "step": 1361 }, { "epoch": 0.27927004305925773, "grad_norm": 1.3192639350891113, "learning_rate": 1.691679628690953e-05, "loss": 0.9353, "step": 1362 }, { "epoch": 0.2794750871437359, "grad_norm": 1.4905531406402588, "learning_rate": 1.691199799154488e-05, "loss": 0.8978, "step": 1363 }, { "epoch": 0.27968013122821406, "grad_norm": 1.3309801816940308, "learning_rate": 1.6907196647014355e-05, "loss": 0.977, "step": 1364 }, { "epoch": 0.27988517531269225, "grad_norm": 1.2463114261627197, "learning_rate": 1.6902392255436023e-05, "loss": 0.9541, "step": 1365 }, { "epoch": 0.2800902193971704, "grad_norm": 1.3308041095733643, "learning_rate": 1.689758481892929e-05, "loss": 0.9586, "step": 1366 }, { "epoch": 0.28029526348164857, "grad_norm": 1.4341139793395996, "learning_rate": 1.6892774339614927e-05, "loss": 1.0382, "step": 1367 }, { "epoch": 0.2805003075661267, "grad_norm": 1.2607753276824951, "learning_rate": 1.6887960819615025e-05, "loss": 0.9098, "step": 1368 }, { "epoch": 0.2807053516506049, "grad_norm": 1.3411458730697632, "learning_rate": 1.6883144261053025e-05, "loss": 0.9278, "step": 1369 }, { "epoch": 0.28091039573508303, "grad_norm": 1.3393460512161255, "learning_rate": 1.687832466605371e-05, "loss": 0.9897, "step": 1370 }, { "epoch": 0.2811154398195612, "grad_norm": 1.3347853422164917, "learning_rate": 1.68735020367432e-05, "loss": 1.0259, "step": 1371 }, { "epoch": 0.28132048390403935, "grad_norm": 1.3935307264328003, "learning_rate": 1.686867637524896e-05, "loss": 0.9917, "step": 1372 }, { "epoch": 0.28152552798851754, "grad_norm": 1.3527874946594238, "learning_rate": 1.6863847683699782e-05, "loss": 0.9484, "step": 1373 }, { "epoch": 0.2817305720729957, "grad_norm": 1.3423935174942017, "learning_rate": 1.6859015964225803e-05, "loss": 0.9842, "step": 1374 }, { "epoch": 0.28193561615747387, "grad_norm": 1.232836127281189, "learning_rate": 1.685418121895849e-05, "loss": 0.9735, "step": 1375 }, { "epoch": 0.282140660241952, "grad_norm": 1.2785344123840332, "learning_rate": 1.6849343450030647e-05, "loss": 0.9419, "step": 1376 }, { "epoch": 0.2823457043264302, "grad_norm": 1.2473357915878296, "learning_rate": 1.6844502659576414e-05, "loss": 0.9881, "step": 1377 }, { "epoch": 0.2825507484109083, "grad_norm": 1.3035774230957031, "learning_rate": 1.683965884973126e-05, "loss": 0.9769, "step": 1378 }, { "epoch": 0.2827557924953865, "grad_norm": 1.263505458831787, "learning_rate": 1.6834812022632e-05, "loss": 1.0269, "step": 1379 }, { "epoch": 0.28296083657986465, "grad_norm": 1.2671624422073364, "learning_rate": 1.6829962180416746e-05, "loss": 0.9655, "step": 1380 }, { "epoch": 0.28316588066434284, "grad_norm": 1.3503303527832031, "learning_rate": 1.6825109325224982e-05, "loss": 1.0012, "step": 1381 }, { "epoch": 0.283370924748821, "grad_norm": 1.3427320718765259, "learning_rate": 1.6820253459197493e-05, "loss": 0.9702, "step": 1382 }, { "epoch": 0.28357596883329916, "grad_norm": 1.3704482316970825, "learning_rate": 1.6815394584476405e-05, "loss": 0.9358, "step": 1383 }, { "epoch": 0.2837810129177773, "grad_norm": 1.3064242601394653, "learning_rate": 1.6810532703205162e-05, "loss": 1.0594, "step": 1384 }, { "epoch": 0.2839860570022555, "grad_norm": 1.3454461097717285, "learning_rate": 1.680566781752854e-05, "loss": 1.0115, "step": 1385 }, { "epoch": 0.2841911010867336, "grad_norm": 1.3547868728637695, "learning_rate": 1.6800799929592643e-05, "loss": 0.9677, "step": 1386 }, { "epoch": 0.2843961451712118, "grad_norm": 1.2845815420150757, "learning_rate": 1.679592904154489e-05, "loss": 0.9992, "step": 1387 }, { "epoch": 0.28460118925568995, "grad_norm": 1.3124490976333618, "learning_rate": 1.6791055155534043e-05, "loss": 0.9783, "step": 1388 }, { "epoch": 0.28480623334016814, "grad_norm": 1.3356611728668213, "learning_rate": 1.6786178273710157e-05, "loss": 1.0352, "step": 1389 }, { "epoch": 0.2850112774246463, "grad_norm": 1.2473483085632324, "learning_rate": 1.678129839822463e-05, "loss": 0.9407, "step": 1390 }, { "epoch": 0.28521632150912446, "grad_norm": 1.2640427350997925, "learning_rate": 1.6776415531230183e-05, "loss": 0.9758, "step": 1391 }, { "epoch": 0.28542136559360265, "grad_norm": 1.3218740224838257, "learning_rate": 1.677152967488084e-05, "loss": 0.938, "step": 1392 }, { "epoch": 0.2856264096780808, "grad_norm": 1.227939486503601, "learning_rate": 1.6766640831331954e-05, "loss": 0.8776, "step": 1393 }, { "epoch": 0.285831453762559, "grad_norm": 1.2798057794570923, "learning_rate": 1.6761749002740195e-05, "loss": 0.9812, "step": 1394 }, { "epoch": 0.2860364978470371, "grad_norm": 1.2832748889923096, "learning_rate": 1.6756854191263553e-05, "loss": 0.9939, "step": 1395 }, { "epoch": 0.2862415419315153, "grad_norm": 1.30195152759552, "learning_rate": 1.6751956399061326e-05, "loss": 0.9304, "step": 1396 }, { "epoch": 0.28644658601599343, "grad_norm": 1.429328441619873, "learning_rate": 1.6747055628294134e-05, "loss": 0.9646, "step": 1397 }, { "epoch": 0.2866516301004716, "grad_norm": 1.4002580642700195, "learning_rate": 1.6742151881123902e-05, "loss": 1.0078, "step": 1398 }, { "epoch": 0.28685667418494976, "grad_norm": 1.3551058769226074, "learning_rate": 1.6737245159713886e-05, "loss": 1.0799, "step": 1399 }, { "epoch": 0.28706171826942795, "grad_norm": 1.2874218225479126, "learning_rate": 1.6732335466228623e-05, "loss": 0.9659, "step": 1400 }, { "epoch": 0.2872667623539061, "grad_norm": 1.3892138004302979, "learning_rate": 1.6727422802834e-05, "loss": 1.0038, "step": 1401 }, { "epoch": 0.28747180643838427, "grad_norm": 1.2864927053451538, "learning_rate": 1.6722507171697184e-05, "loss": 0.9909, "step": 1402 }, { "epoch": 0.2876768505228624, "grad_norm": 1.422977089881897, "learning_rate": 1.6717588574986664e-05, "loss": 0.9523, "step": 1403 }, { "epoch": 0.2878818946073406, "grad_norm": 1.2748820781707764, "learning_rate": 1.6712667014872238e-05, "loss": 0.9111, "step": 1404 }, { "epoch": 0.28808693869181873, "grad_norm": 1.3871415853500366, "learning_rate": 1.6707742493524998e-05, "loss": 1.0119, "step": 1405 }, { "epoch": 0.2882919827762969, "grad_norm": 1.2031582593917847, "learning_rate": 1.6702815013117365e-05, "loss": 0.9337, "step": 1406 }, { "epoch": 0.28849702686077505, "grad_norm": 1.251855731010437, "learning_rate": 1.669788457582304e-05, "loss": 0.9549, "step": 1407 }, { "epoch": 0.28870207094525324, "grad_norm": 1.3836487531661987, "learning_rate": 1.6692951183817056e-05, "loss": 0.967, "step": 1408 }, { "epoch": 0.2889071150297314, "grad_norm": 1.4801326990127563, "learning_rate": 1.668801483927572e-05, "loss": 0.9957, "step": 1409 }, { "epoch": 0.28911215911420957, "grad_norm": 1.3599892854690552, "learning_rate": 1.668307554437667e-05, "loss": 1.0011, "step": 1410 }, { "epoch": 0.2893172031986877, "grad_norm": 1.2467292547225952, "learning_rate": 1.6678133301298817e-05, "loss": 0.9664, "step": 1411 }, { "epoch": 0.2895222472831659, "grad_norm": 1.2846544981002808, "learning_rate": 1.6673188112222394e-05, "loss": 0.9383, "step": 1412 }, { "epoch": 0.289727291367644, "grad_norm": 1.2812508344650269, "learning_rate": 1.666823997932893e-05, "loss": 0.9504, "step": 1413 }, { "epoch": 0.2899323354521222, "grad_norm": 1.3354895114898682, "learning_rate": 1.6663288904801245e-05, "loss": 0.9523, "step": 1414 }, { "epoch": 0.29013737953660035, "grad_norm": 1.2210520505905151, "learning_rate": 1.6658334890823465e-05, "loss": 1.0044, "step": 1415 }, { "epoch": 0.29034242362107854, "grad_norm": 1.3719216585159302, "learning_rate": 1.6653377939581006e-05, "loss": 0.9639, "step": 1416 }, { "epoch": 0.2905474677055567, "grad_norm": 1.404337763786316, "learning_rate": 1.6648418053260585e-05, "loss": 0.9181, "step": 1417 }, { "epoch": 0.29075251179003486, "grad_norm": 1.3233592510223389, "learning_rate": 1.6643455234050204e-05, "loss": 1.0361, "step": 1418 }, { "epoch": 0.290957555874513, "grad_norm": 1.3259671926498413, "learning_rate": 1.6638489484139174e-05, "loss": 0.9546, "step": 1419 }, { "epoch": 0.2911625999589912, "grad_norm": 1.2660515308380127, "learning_rate": 1.663352080571809e-05, "loss": 0.9815, "step": 1420 }, { "epoch": 0.2913676440434693, "grad_norm": 1.4272561073303223, "learning_rate": 1.6628549200978837e-05, "loss": 0.8654, "step": 1421 }, { "epoch": 0.2915726881279475, "grad_norm": 1.301689863204956, "learning_rate": 1.6623574672114596e-05, "loss": 0.9263, "step": 1422 }, { "epoch": 0.29177773221242564, "grad_norm": 1.2276722192764282, "learning_rate": 1.6618597221319835e-05, "loss": 0.9262, "step": 1423 }, { "epoch": 0.29198277629690383, "grad_norm": 1.29997980594635, "learning_rate": 1.6613616850790308e-05, "loss": 1.022, "step": 1424 }, { "epoch": 0.292187820381382, "grad_norm": 1.278609037399292, "learning_rate": 1.6608633562723068e-05, "loss": 0.9658, "step": 1425 }, { "epoch": 0.29239286446586016, "grad_norm": 1.2486791610717773, "learning_rate": 1.6603647359316445e-05, "loss": 1.0129, "step": 1426 }, { "epoch": 0.29259790855033835, "grad_norm": 1.424613118171692, "learning_rate": 1.6598658242770054e-05, "loss": 0.9787, "step": 1427 }, { "epoch": 0.2928029526348165, "grad_norm": 1.305044174194336, "learning_rate": 1.6593666215284808e-05, "loss": 0.8689, "step": 1428 }, { "epoch": 0.29300799671929467, "grad_norm": 1.1623188257217407, "learning_rate": 1.6588671279062882e-05, "loss": 0.9688, "step": 1429 }, { "epoch": 0.2932130408037728, "grad_norm": 1.2898266315460205, "learning_rate": 1.658367343630776e-05, "loss": 1.0373, "step": 1430 }, { "epoch": 0.293418084888251, "grad_norm": 1.3681561946868896, "learning_rate": 1.6578672689224188e-05, "loss": 0.9872, "step": 1431 }, { "epoch": 0.29362312897272913, "grad_norm": 1.3410718441009521, "learning_rate": 1.6573669040018202e-05, "loss": 1.0216, "step": 1432 }, { "epoch": 0.2938281730572073, "grad_norm": 1.347725749015808, "learning_rate": 1.6568662490897117e-05, "loss": 1.086, "step": 1433 }, { "epoch": 0.29403321714168545, "grad_norm": 1.2981255054473877, "learning_rate": 1.656365304406953e-05, "loss": 0.985, "step": 1434 }, { "epoch": 0.29423826122616364, "grad_norm": 1.3726893663406372, "learning_rate": 1.655864070174531e-05, "loss": 1.0051, "step": 1435 }, { "epoch": 0.2944433053106418, "grad_norm": 1.3644248247146606, "learning_rate": 1.655362546613561e-05, "loss": 0.9779, "step": 1436 }, { "epoch": 0.29464834939511997, "grad_norm": 1.2701478004455566, "learning_rate": 1.6548607339452853e-05, "loss": 1.0361, "step": 1437 }, { "epoch": 0.2948533934795981, "grad_norm": 1.2983769178390503, "learning_rate": 1.6543586323910742e-05, "loss": 1.0248, "step": 1438 }, { "epoch": 0.2950584375640763, "grad_norm": 1.2623478174209595, "learning_rate": 1.653856242172425e-05, "loss": 1.0146, "step": 1439 }, { "epoch": 0.2952634816485544, "grad_norm": 1.2573851346969604, "learning_rate": 1.6533535635109633e-05, "loss": 1.0005, "step": 1440 }, { "epoch": 0.2954685257330326, "grad_norm": 1.2470216751098633, "learning_rate": 1.652850596628441e-05, "loss": 0.9524, "step": 1441 }, { "epoch": 0.29567356981751075, "grad_norm": 1.227606177330017, "learning_rate": 1.652347341746737e-05, "loss": 0.9897, "step": 1442 }, { "epoch": 0.29587861390198894, "grad_norm": 1.3981770277023315, "learning_rate": 1.651843799087858e-05, "loss": 0.9663, "step": 1443 }, { "epoch": 0.2960836579864671, "grad_norm": 1.5922836065292358, "learning_rate": 1.6513399688739377e-05, "loss": 1.0634, "step": 1444 }, { "epoch": 0.29628870207094526, "grad_norm": 1.278095006942749, "learning_rate": 1.650835851327236e-05, "loss": 0.9177, "step": 1445 }, { "epoch": 0.2964937461554234, "grad_norm": 1.3035579919815063, "learning_rate": 1.650331446670139e-05, "loss": 0.9711, "step": 1446 }, { "epoch": 0.2966987902399016, "grad_norm": 1.3041365146636963, "learning_rate": 1.6498267551251618e-05, "loss": 1.0057, "step": 1447 }, { "epoch": 0.2969038343243797, "grad_norm": 1.2472461462020874, "learning_rate": 1.6493217769149433e-05, "loss": 0.9481, "step": 1448 }, { "epoch": 0.2971088784088579, "grad_norm": 1.3397608995437622, "learning_rate": 1.6488165122622507e-05, "loss": 1.0144, "step": 1449 }, { "epoch": 0.29731392249333605, "grad_norm": 1.1853246688842773, "learning_rate": 1.648310961389977e-05, "loss": 0.8874, "step": 1450 }, { "epoch": 0.29751896657781424, "grad_norm": 1.3308181762695312, "learning_rate": 1.647805124521141e-05, "loss": 0.968, "step": 1451 }, { "epoch": 0.29772401066229237, "grad_norm": 1.3046295642852783, "learning_rate": 1.6472990018788884e-05, "loss": 0.9221, "step": 1452 }, { "epoch": 0.29792905474677056, "grad_norm": 1.29019296169281, "learning_rate": 1.6467925936864908e-05, "loss": 0.9092, "step": 1453 }, { "epoch": 0.2981340988312487, "grad_norm": 1.308222770690918, "learning_rate": 1.6462859001673455e-05, "loss": 0.9587, "step": 1454 }, { "epoch": 0.2983391429157269, "grad_norm": 1.5373995304107666, "learning_rate": 1.6457789215449756e-05, "loss": 0.8894, "step": 1455 }, { "epoch": 0.298544187000205, "grad_norm": 1.291646122932434, "learning_rate": 1.6452716580430303e-05, "loss": 0.9824, "step": 1456 }, { "epoch": 0.2987492310846832, "grad_norm": 1.328204870223999, "learning_rate": 1.644764109885284e-05, "loss": 0.9767, "step": 1457 }, { "epoch": 0.29895427516916134, "grad_norm": 1.2851147651672363, "learning_rate": 1.6442562772956382e-05, "loss": 1.0078, "step": 1458 }, { "epoch": 0.29915931925363953, "grad_norm": 1.3170043230056763, "learning_rate": 1.6437481604981175e-05, "loss": 0.9684, "step": 1459 }, { "epoch": 0.2993643633381177, "grad_norm": 1.3940823078155518, "learning_rate": 1.6432397597168735e-05, "loss": 1.0233, "step": 1460 }, { "epoch": 0.29956940742259586, "grad_norm": 1.2794125080108643, "learning_rate": 1.6427310751761824e-05, "loss": 0.9084, "step": 1461 }, { "epoch": 0.29977445150707405, "grad_norm": 1.2700731754302979, "learning_rate": 1.642222107100446e-05, "loss": 0.9533, "step": 1462 }, { "epoch": 0.2999794955915522, "grad_norm": 1.2433539628982544, "learning_rate": 1.641712855714191e-05, "loss": 0.9705, "step": 1463 }, { "epoch": 0.30018453967603037, "grad_norm": 1.320159912109375, "learning_rate": 1.6412033212420693e-05, "loss": 1.0033, "step": 1464 }, { "epoch": 0.3003895837605085, "grad_norm": 1.2359455823898315, "learning_rate": 1.640693503908857e-05, "loss": 0.9082, "step": 1465 }, { "epoch": 0.3005946278449867, "grad_norm": 1.2547746896743774, "learning_rate": 1.6401834039394556e-05, "loss": 0.9078, "step": 1466 }, { "epoch": 0.30079967192946483, "grad_norm": 1.2557631731033325, "learning_rate": 1.6396730215588913e-05, "loss": 0.9518, "step": 1467 }, { "epoch": 0.301004716013943, "grad_norm": 1.3615800142288208, "learning_rate": 1.6391623569923147e-05, "loss": 0.9661, "step": 1468 }, { "epoch": 0.30120976009842115, "grad_norm": 1.2586873769760132, "learning_rate": 1.6386514104650007e-05, "loss": 0.9786, "step": 1469 }, { "epoch": 0.30141480418289934, "grad_norm": 1.207363486289978, "learning_rate": 1.638140182202349e-05, "loss": 0.9475, "step": 1470 }, { "epoch": 0.3016198482673775, "grad_norm": 1.2145503759384155, "learning_rate": 1.6376286724298835e-05, "loss": 0.8726, "step": 1471 }, { "epoch": 0.30182489235185567, "grad_norm": 1.2878011465072632, "learning_rate": 1.6371168813732514e-05, "loss": 0.9729, "step": 1472 }, { "epoch": 0.3020299364363338, "grad_norm": 1.283449411392212, "learning_rate": 1.6366048092582253e-05, "loss": 0.9061, "step": 1473 }, { "epoch": 0.302234980520812, "grad_norm": 1.2282828092575073, "learning_rate": 1.636092456310701e-05, "loss": 0.8803, "step": 1474 }, { "epoch": 0.3024400246052901, "grad_norm": 1.338555932044983, "learning_rate": 1.6355798227566987e-05, "loss": 0.9509, "step": 1475 }, { "epoch": 0.3026450686897683, "grad_norm": 1.3569486141204834, "learning_rate": 1.635066908822362e-05, "loss": 0.9235, "step": 1476 }, { "epoch": 0.30285011277424645, "grad_norm": 1.3927351236343384, "learning_rate": 1.6345537147339578e-05, "loss": 0.934, "step": 1477 }, { "epoch": 0.30305515685872464, "grad_norm": 1.2590112686157227, "learning_rate": 1.634040240717878e-05, "loss": 0.9789, "step": 1478 }, { "epoch": 0.3032602009432028, "grad_norm": 1.3951443433761597, "learning_rate": 1.6335264870006362e-05, "loss": 0.996, "step": 1479 }, { "epoch": 0.30346524502768096, "grad_norm": 1.4118865728378296, "learning_rate": 1.6330124538088705e-05, "loss": 1.0368, "step": 1480 }, { "epoch": 0.3036702891121591, "grad_norm": 1.196970820426941, "learning_rate": 1.632498141369342e-05, "loss": 0.9744, "step": 1481 }, { "epoch": 0.3038753331966373, "grad_norm": 1.3438489437103271, "learning_rate": 1.6319835499089358e-05, "loss": 0.9894, "step": 1482 }, { "epoch": 0.3040803772811154, "grad_norm": 1.231277585029602, "learning_rate": 1.6314686796546578e-05, "loss": 0.9884, "step": 1483 }, { "epoch": 0.3042854213655936, "grad_norm": 1.272566556930542, "learning_rate": 1.6309535308336394e-05, "loss": 0.9654, "step": 1484 }, { "epoch": 0.30449046545007175, "grad_norm": 1.3233712911605835, "learning_rate": 1.630438103673134e-05, "loss": 0.9108, "step": 1485 }, { "epoch": 0.30469550953454994, "grad_norm": 1.3168528079986572, "learning_rate": 1.6299223984005172e-05, "loss": 0.9752, "step": 1486 }, { "epoch": 0.30490055361902807, "grad_norm": 1.3389508724212646, "learning_rate": 1.6294064152432878e-05, "loss": 0.9393, "step": 1487 }, { "epoch": 0.30510559770350626, "grad_norm": 1.306587815284729, "learning_rate": 1.6288901544290672e-05, "loss": 1.0407, "step": 1488 }, { "epoch": 0.3053106417879844, "grad_norm": 1.3791940212249756, "learning_rate": 1.6283736161855995e-05, "loss": 0.9628, "step": 1489 }, { "epoch": 0.3055156858724626, "grad_norm": 1.317984938621521, "learning_rate": 1.6278568007407506e-05, "loss": 0.9478, "step": 1490 }, { "epoch": 0.3057207299569407, "grad_norm": 1.4068602323532104, "learning_rate": 1.6273397083225088e-05, "loss": 1.0139, "step": 1491 }, { "epoch": 0.3059257740414189, "grad_norm": 1.3915413618087769, "learning_rate": 1.626822339158985e-05, "loss": 0.9258, "step": 1492 }, { "epoch": 0.30613081812589704, "grad_norm": 1.3864842653274536, "learning_rate": 1.6263046934784127e-05, "loss": 0.9542, "step": 1493 }, { "epoch": 0.30633586221037523, "grad_norm": 1.3779350519180298, "learning_rate": 1.6257867715091454e-05, "loss": 0.9929, "step": 1494 }, { "epoch": 0.30654090629485337, "grad_norm": 1.2689208984375, "learning_rate": 1.6252685734796603e-05, "loss": 0.9083, "step": 1495 }, { "epoch": 0.30674595037933156, "grad_norm": 1.2963848114013672, "learning_rate": 1.624750099618556e-05, "loss": 0.9313, "step": 1496 }, { "epoch": 0.30695099446380975, "grad_norm": 1.308586835861206, "learning_rate": 1.6242313501545522e-05, "loss": 0.8854, "step": 1497 }, { "epoch": 0.3071560385482879, "grad_norm": 1.3577755689620972, "learning_rate": 1.6237123253164907e-05, "loss": 0.9424, "step": 1498 }, { "epoch": 0.30736108263276607, "grad_norm": 1.184312105178833, "learning_rate": 1.6231930253333346e-05, "loss": 1.0295, "step": 1499 }, { "epoch": 0.3075661267172442, "grad_norm": 1.3162592649459839, "learning_rate": 1.622673450434169e-05, "loss": 0.8926, "step": 1500 }, { "epoch": 0.3077711708017224, "grad_norm": 1.27334463596344, "learning_rate": 1.6221536008481987e-05, "loss": 1.0256, "step": 1501 }, { "epoch": 0.30797621488620053, "grad_norm": 1.4826436042785645, "learning_rate": 1.621633476804752e-05, "loss": 1.0466, "step": 1502 }, { "epoch": 0.3081812589706787, "grad_norm": 1.2657030820846558, "learning_rate": 1.6211130785332757e-05, "loss": 0.9171, "step": 1503 }, { "epoch": 0.30838630305515685, "grad_norm": 1.3196693658828735, "learning_rate": 1.620592406263339e-05, "loss": 0.9158, "step": 1504 }, { "epoch": 0.30859134713963504, "grad_norm": 1.3942404985427856, "learning_rate": 1.6200714602246325e-05, "loss": 1.0693, "step": 1505 }, { "epoch": 0.3087963912241132, "grad_norm": 1.2415913343429565, "learning_rate": 1.6195502406469664e-05, "loss": 0.9449, "step": 1506 }, { "epoch": 0.30900143530859137, "grad_norm": 1.3141828775405884, "learning_rate": 1.6190287477602716e-05, "loss": 0.9066, "step": 1507 }, { "epoch": 0.3092064793930695, "grad_norm": 1.3301750421524048, "learning_rate": 1.6185069817946012e-05, "loss": 0.9812, "step": 1508 }, { "epoch": 0.3094115234775477, "grad_norm": 1.3136850595474243, "learning_rate": 1.6179849429801263e-05, "loss": 1.0332, "step": 1509 }, { "epoch": 0.3096165675620258, "grad_norm": 1.4320350885391235, "learning_rate": 1.6174626315471408e-05, "loss": 0.9827, "step": 1510 }, { "epoch": 0.309821611646504, "grad_norm": 1.2771052122116089, "learning_rate": 1.6169400477260566e-05, "loss": 1.0014, "step": 1511 }, { "epoch": 0.31002665573098215, "grad_norm": 1.3690335750579834, "learning_rate": 1.6164171917474078e-05, "loss": 1.0205, "step": 1512 }, { "epoch": 0.31023169981546034, "grad_norm": 1.3522071838378906, "learning_rate": 1.615894063841847e-05, "loss": 1.0254, "step": 1513 }, { "epoch": 0.31043674389993847, "grad_norm": 1.3598700761795044, "learning_rate": 1.6153706642401477e-05, "loss": 0.977, "step": 1514 }, { "epoch": 0.31064178798441666, "grad_norm": 1.3602869510650635, "learning_rate": 1.6148469931732025e-05, "loss": 0.9366, "step": 1515 }, { "epoch": 0.3108468320688948, "grad_norm": 1.1992120742797852, "learning_rate": 1.614323050872025e-05, "loss": 0.9903, "step": 1516 }, { "epoch": 0.311051876153373, "grad_norm": 1.3629703521728516, "learning_rate": 1.6137988375677466e-05, "loss": 0.8808, "step": 1517 }, { "epoch": 0.3112569202378511, "grad_norm": 1.3688325881958008, "learning_rate": 1.6132743534916204e-05, "loss": 1.0335, "step": 1518 }, { "epoch": 0.3114619643223293, "grad_norm": 1.2468962669372559, "learning_rate": 1.612749598875017e-05, "loss": 1.0215, "step": 1519 }, { "epoch": 0.31166700840680744, "grad_norm": 1.465788722038269, "learning_rate": 1.6122245739494277e-05, "loss": 0.9186, "step": 1520 }, { "epoch": 0.31187205249128563, "grad_norm": 1.2165857553482056, "learning_rate": 1.6116992789464617e-05, "loss": 0.8576, "step": 1521 }, { "epoch": 0.31207709657576377, "grad_norm": 1.2193880081176758, "learning_rate": 1.6111737140978495e-05, "loss": 0.9754, "step": 1522 }, { "epoch": 0.31228214066024196, "grad_norm": 1.3210331201553345, "learning_rate": 1.6106478796354382e-05, "loss": 0.8664, "step": 1523 }, { "epoch": 0.3124871847447201, "grad_norm": 1.3013793230056763, "learning_rate": 1.6101217757911956e-05, "loss": 0.9907, "step": 1524 }, { "epoch": 0.3126922288291983, "grad_norm": 1.3801352977752686, "learning_rate": 1.6095954027972074e-05, "loss": 1.0272, "step": 1525 }, { "epoch": 0.3128972729136764, "grad_norm": 1.23331618309021, "learning_rate": 1.6090687608856782e-05, "loss": 0.9351, "step": 1526 }, { "epoch": 0.3131023169981546, "grad_norm": 1.2690703868865967, "learning_rate": 1.6085418502889315e-05, "loss": 1.0048, "step": 1527 }, { "epoch": 0.31330736108263274, "grad_norm": 1.239539623260498, "learning_rate": 1.6080146712394097e-05, "loss": 0.9134, "step": 1528 }, { "epoch": 0.31351240516711093, "grad_norm": 1.3612213134765625, "learning_rate": 1.607487223969672e-05, "loss": 1.051, "step": 1529 }, { "epoch": 0.31371744925158906, "grad_norm": 1.3606823682785034, "learning_rate": 1.6069595087123982e-05, "loss": 0.9495, "step": 1530 }, { "epoch": 0.31392249333606725, "grad_norm": 1.264880657196045, "learning_rate": 1.606431525700384e-05, "loss": 0.9852, "step": 1531 }, { "epoch": 0.31412753742054544, "grad_norm": 1.294826865196228, "learning_rate": 1.6059032751665454e-05, "loss": 0.9535, "step": 1532 }, { "epoch": 0.3143325815050236, "grad_norm": 1.293549656867981, "learning_rate": 1.6053747573439147e-05, "loss": 0.9765, "step": 1533 }, { "epoch": 0.31453762558950177, "grad_norm": 1.2542610168457031, "learning_rate": 1.6048459724656434e-05, "loss": 1.0144, "step": 1534 }, { "epoch": 0.3147426696739799, "grad_norm": 1.384893774986267, "learning_rate": 1.6043169207649993e-05, "loss": 1.0674, "step": 1535 }, { "epoch": 0.3149477137584581, "grad_norm": 1.3235433101654053, "learning_rate": 1.6037876024753696e-05, "loss": 0.9746, "step": 1536 }, { "epoch": 0.3151527578429362, "grad_norm": 1.3433263301849365, "learning_rate": 1.6032580178302585e-05, "loss": 1.0145, "step": 1537 }, { "epoch": 0.3153578019274144, "grad_norm": 1.2119313478469849, "learning_rate": 1.6027281670632864e-05, "loss": 0.9286, "step": 1538 }, { "epoch": 0.31556284601189255, "grad_norm": 1.2404491901397705, "learning_rate": 1.6021980504081933e-05, "loss": 0.9575, "step": 1539 }, { "epoch": 0.31576789009637074, "grad_norm": 1.2445932626724243, "learning_rate": 1.6016676680988346e-05, "loss": 1.052, "step": 1540 }, { "epoch": 0.3159729341808489, "grad_norm": 1.2252799272537231, "learning_rate": 1.6011370203691846e-05, "loss": 1.0041, "step": 1541 }, { "epoch": 0.31617797826532706, "grad_norm": 1.2804720401763916, "learning_rate": 1.600606107453333e-05, "loss": 0.957, "step": 1542 }, { "epoch": 0.3163830223498052, "grad_norm": 1.2850855588912964, "learning_rate": 1.6000749295854875e-05, "loss": 0.8784, "step": 1543 }, { "epoch": 0.3165880664342834, "grad_norm": 1.3047643899917603, "learning_rate": 1.5995434869999723e-05, "loss": 0.993, "step": 1544 }, { "epoch": 0.3167931105187615, "grad_norm": 1.3335940837860107, "learning_rate": 1.599011779931229e-05, "loss": 1.0268, "step": 1545 }, { "epoch": 0.3169981546032397, "grad_norm": 1.4568735361099243, "learning_rate": 1.598479808613815e-05, "loss": 0.9783, "step": 1546 }, { "epoch": 0.31720319868771785, "grad_norm": 1.3083984851837158, "learning_rate": 1.597947573282405e-05, "loss": 0.999, "step": 1547 }, { "epoch": 0.31740824277219604, "grad_norm": 1.3678699731826782, "learning_rate": 1.5974150741717892e-05, "loss": 0.905, "step": 1548 }, { "epoch": 0.31761328685667417, "grad_norm": 1.2402360439300537, "learning_rate": 1.5968823115168754e-05, "loss": 1.0225, "step": 1549 }, { "epoch": 0.31781833094115236, "grad_norm": 1.2166987657546997, "learning_rate": 1.596349285552687e-05, "loss": 0.9668, "step": 1550 }, { "epoch": 0.3180233750256305, "grad_norm": 1.320797085762024, "learning_rate": 1.5958159965143635e-05, "loss": 0.8772, "step": 1551 }, { "epoch": 0.3182284191101087, "grad_norm": 1.2734493017196655, "learning_rate": 1.5952824446371608e-05, "loss": 0.9791, "step": 1552 }, { "epoch": 0.3184334631945868, "grad_norm": 1.2140427827835083, "learning_rate": 1.59474863015645e-05, "loss": 0.9229, "step": 1553 }, { "epoch": 0.318638507279065, "grad_norm": 1.3714488744735718, "learning_rate": 1.5942145533077188e-05, "loss": 0.9393, "step": 1554 }, { "epoch": 0.31884355136354314, "grad_norm": 1.2102304697036743, "learning_rate": 1.5936802143265708e-05, "loss": 0.8838, "step": 1555 }, { "epoch": 0.31904859544802133, "grad_norm": 1.2767683267593384, "learning_rate": 1.593145613448724e-05, "loss": 0.9393, "step": 1556 }, { "epoch": 0.31925363953249947, "grad_norm": 1.263932228088379, "learning_rate": 1.592610750910014e-05, "loss": 1.0003, "step": 1557 }, { "epoch": 0.31945868361697766, "grad_norm": 1.30350923538208, "learning_rate": 1.5920756269463896e-05, "loss": 1.0133, "step": 1558 }, { "epoch": 0.3196637277014558, "grad_norm": 1.2309134006500244, "learning_rate": 1.591540241793916e-05, "loss": 0.9402, "step": 1559 }, { "epoch": 0.319868771785934, "grad_norm": 1.3097130060195923, "learning_rate": 1.5910045956887742e-05, "loss": 0.9826, "step": 1560 }, { "epoch": 0.3200738158704121, "grad_norm": 1.381818413734436, "learning_rate": 1.590468688867259e-05, "loss": 1.005, "step": 1561 }, { "epoch": 0.3202788599548903, "grad_norm": 1.3814873695373535, "learning_rate": 1.589932521565781e-05, "loss": 0.9374, "step": 1562 }, { "epoch": 0.32048390403936844, "grad_norm": 1.4009571075439453, "learning_rate": 1.589396094020866e-05, "loss": 0.9434, "step": 1563 }, { "epoch": 0.32068894812384663, "grad_norm": 1.3868308067321777, "learning_rate": 1.5888594064691544e-05, "loss": 0.9852, "step": 1564 }, { "epoch": 0.32089399220832476, "grad_norm": 1.3260116577148438, "learning_rate": 1.5883224591474e-05, "loss": 0.9824, "step": 1565 }, { "epoch": 0.32109903629280295, "grad_norm": 1.2013111114501953, "learning_rate": 1.5877852522924733e-05, "loss": 0.9611, "step": 1566 }, { "epoch": 0.32130408037728114, "grad_norm": 1.358737587928772, "learning_rate": 1.587247786141358e-05, "loss": 0.926, "step": 1567 }, { "epoch": 0.3215091244617593, "grad_norm": 1.3592145442962646, "learning_rate": 1.586710060931152e-05, "loss": 0.9717, "step": 1568 }, { "epoch": 0.32171416854623747, "grad_norm": 1.440758466720581, "learning_rate": 1.5861720768990685e-05, "loss": 1.0326, "step": 1569 }, { "epoch": 0.3219192126307156, "grad_norm": 1.3760524988174438, "learning_rate": 1.585633834282434e-05, "loss": 1.0191, "step": 1570 }, { "epoch": 0.3221242567151938, "grad_norm": 1.2799186706542969, "learning_rate": 1.58509533331869e-05, "loss": 0.9844, "step": 1571 }, { "epoch": 0.3223293007996719, "grad_norm": 1.230986475944519, "learning_rate": 1.5845565742453906e-05, "loss": 0.9616, "step": 1572 }, { "epoch": 0.3225343448841501, "grad_norm": 1.2677353620529175, "learning_rate": 1.584017557300205e-05, "loss": 0.9495, "step": 1573 }, { "epoch": 0.32273938896862825, "grad_norm": 1.338418960571289, "learning_rate": 1.5834782827209158e-05, "loss": 0.9687, "step": 1574 }, { "epoch": 0.32294443305310644, "grad_norm": 1.3382022380828857, "learning_rate": 1.5829387507454185e-05, "loss": 1.0106, "step": 1575 }, { "epoch": 0.3231494771375846, "grad_norm": 1.3802021741867065, "learning_rate": 1.5823989616117234e-05, "loss": 0.96, "step": 1576 }, { "epoch": 0.32335452122206276, "grad_norm": 1.2973979711532593, "learning_rate": 1.581858915557953e-05, "loss": 0.9918, "step": 1577 }, { "epoch": 0.3235595653065409, "grad_norm": 1.3092931509017944, "learning_rate": 1.5813186128223446e-05, "loss": 0.915, "step": 1578 }, { "epoch": 0.3237646093910191, "grad_norm": 1.3290965557098389, "learning_rate": 1.5807780536432474e-05, "loss": 0.9538, "step": 1579 }, { "epoch": 0.3239696534754972, "grad_norm": 1.2837507724761963, "learning_rate": 1.580237238259124e-05, "loss": 0.9366, "step": 1580 }, { "epoch": 0.3241746975599754, "grad_norm": 1.2971149682998657, "learning_rate": 1.579696166908551e-05, "loss": 0.9675, "step": 1581 }, { "epoch": 0.32437974164445355, "grad_norm": 1.3537466526031494, "learning_rate": 1.5791548398302167e-05, "loss": 0.9395, "step": 1582 }, { "epoch": 0.32458478572893174, "grad_norm": 1.28249990940094, "learning_rate": 1.5786132572629226e-05, "loss": 0.9355, "step": 1583 }, { "epoch": 0.32478982981340987, "grad_norm": 1.4390267133712769, "learning_rate": 1.578071419445583e-05, "loss": 0.9464, "step": 1584 }, { "epoch": 0.32499487389788806, "grad_norm": 1.356756567955017, "learning_rate": 1.5775293266172252e-05, "loss": 0.9919, "step": 1585 }, { "epoch": 0.3251999179823662, "grad_norm": 1.2981164455413818, "learning_rate": 1.5769869790169882e-05, "loss": 0.9971, "step": 1586 }, { "epoch": 0.3254049620668444, "grad_norm": 1.3312709331512451, "learning_rate": 1.5764443768841234e-05, "loss": 0.865, "step": 1587 }, { "epoch": 0.3256100061513225, "grad_norm": 1.3377991914749146, "learning_rate": 1.5759015204579958e-05, "loss": 0.9145, "step": 1588 }, { "epoch": 0.3258150502358007, "grad_norm": 1.3000600337982178, "learning_rate": 1.575358409978081e-05, "loss": 0.9858, "step": 1589 }, { "epoch": 0.32602009432027884, "grad_norm": 1.4889805316925049, "learning_rate": 1.574815045683968e-05, "loss": 0.992, "step": 1590 }, { "epoch": 0.32622513840475703, "grad_norm": 1.2491977214813232, "learning_rate": 1.574271427815356e-05, "loss": 0.9725, "step": 1591 }, { "epoch": 0.32643018248923517, "grad_norm": 1.189635157585144, "learning_rate": 1.5737275566120577e-05, "loss": 1.0096, "step": 1592 }, { "epoch": 0.32663522657371336, "grad_norm": 1.3107012510299683, "learning_rate": 1.5731834323139973e-05, "loss": 0.922, "step": 1593 }, { "epoch": 0.3268402706581915, "grad_norm": 1.3442434072494507, "learning_rate": 1.5726390551612096e-05, "loss": 1.0004, "step": 1594 }, { "epoch": 0.3270453147426697, "grad_norm": 1.3980573415756226, "learning_rate": 1.5720944253938425e-05, "loss": 0.9401, "step": 1595 }, { "epoch": 0.3272503588271478, "grad_norm": 1.277538776397705, "learning_rate": 1.571549543252154e-05, "loss": 1.0531, "step": 1596 }, { "epoch": 0.327455402911626, "grad_norm": 1.2173094749450684, "learning_rate": 1.5710044089765144e-05, "loss": 0.9258, "step": 1597 }, { "epoch": 0.32766044699610414, "grad_norm": 1.437086582183838, "learning_rate": 1.5704590228074044e-05, "loss": 0.931, "step": 1598 }, { "epoch": 0.3278654910805823, "grad_norm": 1.3713173866271973, "learning_rate": 1.5699133849854164e-05, "loss": 0.8686, "step": 1599 }, { "epoch": 0.32807053516506046, "grad_norm": 1.2275571823120117, "learning_rate": 1.5693674957512536e-05, "loss": 0.9518, "step": 1600 }, { "epoch": 0.32827557924953865, "grad_norm": 1.2314965724945068, "learning_rate": 1.56882135534573e-05, "loss": 0.9365, "step": 1601 }, { "epoch": 0.32848062333401684, "grad_norm": 1.2712723016738892, "learning_rate": 1.5682749640097708e-05, "loss": 0.998, "step": 1602 }, { "epoch": 0.328685667418495, "grad_norm": 1.4353537559509277, "learning_rate": 1.5677283219844115e-05, "loss": 1.0299, "step": 1603 }, { "epoch": 0.32889071150297317, "grad_norm": 1.2576978206634521, "learning_rate": 1.5671814295107983e-05, "loss": 0.8844, "step": 1604 }, { "epoch": 0.3290957555874513, "grad_norm": 1.4705040454864502, "learning_rate": 1.5666342868301878e-05, "loss": 1.0234, "step": 1605 }, { "epoch": 0.3293007996719295, "grad_norm": 1.3410818576812744, "learning_rate": 1.566086894183947e-05, "loss": 0.9648, "step": 1606 }, { "epoch": 0.3295058437564076, "grad_norm": 1.3032735586166382, "learning_rate": 1.565539251813554e-05, "loss": 1.0148, "step": 1607 }, { "epoch": 0.3297108878408858, "grad_norm": 1.3216034173965454, "learning_rate": 1.5649913599605956e-05, "loss": 0.9892, "step": 1608 }, { "epoch": 0.32991593192536395, "grad_norm": 1.213852882385254, "learning_rate": 1.5644432188667695e-05, "loss": 0.9467, "step": 1609 }, { "epoch": 0.33012097600984214, "grad_norm": 1.3953238725662231, "learning_rate": 1.563894828773883e-05, "loss": 0.9693, "step": 1610 }, { "epoch": 0.33032602009432027, "grad_norm": 1.3565595149993896, "learning_rate": 1.563346189923854e-05, "loss": 0.9925, "step": 1611 }, { "epoch": 0.33053106417879846, "grad_norm": 1.350000023841858, "learning_rate": 1.5627973025587093e-05, "loss": 0.9599, "step": 1612 }, { "epoch": 0.3307361082632766, "grad_norm": 1.4594883918762207, "learning_rate": 1.5622481669205857e-05, "loss": 0.9924, "step": 1613 }, { "epoch": 0.3309411523477548, "grad_norm": 1.3146092891693115, "learning_rate": 1.5616987832517297e-05, "loss": 1.0138, "step": 1614 }, { "epoch": 0.3311461964322329, "grad_norm": 1.2024871110916138, "learning_rate": 1.5611491517944963e-05, "loss": 0.9598, "step": 1615 }, { "epoch": 0.3313512405167111, "grad_norm": 1.214239239692688, "learning_rate": 1.5605992727913512e-05, "loss": 0.9139, "step": 1616 }, { "epoch": 0.33155628460118924, "grad_norm": 1.2205051183700562, "learning_rate": 1.560049146484868e-05, "loss": 0.927, "step": 1617 }, { "epoch": 0.33176132868566743, "grad_norm": 1.2977254390716553, "learning_rate": 1.5594987731177305e-05, "loss": 0.9259, "step": 1618 }, { "epoch": 0.33196637277014557, "grad_norm": 1.2063957452774048, "learning_rate": 1.5589481529327306e-05, "loss": 1.0227, "step": 1619 }, { "epoch": 0.33217141685462376, "grad_norm": 1.2079346179962158, "learning_rate": 1.5583972861727697e-05, "loss": 0.9297, "step": 1620 }, { "epoch": 0.3323764609391019, "grad_norm": 1.2258721590042114, "learning_rate": 1.5578461730808575e-05, "loss": 0.9004, "step": 1621 }, { "epoch": 0.3325815050235801, "grad_norm": 1.2998933792114258, "learning_rate": 1.5572948139001128e-05, "loss": 0.7987, "step": 1622 }, { "epoch": 0.3327865491080582, "grad_norm": 1.2398930788040161, "learning_rate": 1.5567432088737625e-05, "loss": 0.9284, "step": 1623 }, { "epoch": 0.3329915931925364, "grad_norm": 1.2818493843078613, "learning_rate": 1.5561913582451428e-05, "loss": 0.9734, "step": 1624 }, { "epoch": 0.33319663727701454, "grad_norm": 1.3398280143737793, "learning_rate": 1.555639262257697e-05, "loss": 0.9327, "step": 1625 }, { "epoch": 0.33340168136149273, "grad_norm": 1.6210393905639648, "learning_rate": 1.555086921154977e-05, "loss": 0.9513, "step": 1626 }, { "epoch": 0.33360672544597086, "grad_norm": 1.3669112920761108, "learning_rate": 1.5545343351806443e-05, "loss": 1.0206, "step": 1627 }, { "epoch": 0.33381176953044905, "grad_norm": 1.3541561365127563, "learning_rate": 1.5539815045784663e-05, "loss": 0.9982, "step": 1628 }, { "epoch": 0.3340168136149272, "grad_norm": 1.2373918294906616, "learning_rate": 1.5534284295923196e-05, "loss": 1.0227, "step": 1629 }, { "epoch": 0.3342218576994054, "grad_norm": 1.214217185974121, "learning_rate": 1.552875110466188e-05, "loss": 0.9456, "step": 1630 }, { "epoch": 0.3344269017838835, "grad_norm": 1.151260495185852, "learning_rate": 1.552321547444164e-05, "loss": 0.9604, "step": 1631 }, { "epoch": 0.3346319458683617, "grad_norm": 1.4250744581222534, "learning_rate": 1.551767740770446e-05, "loss": 1.0001, "step": 1632 }, { "epoch": 0.33483698995283984, "grad_norm": 1.3337371349334717, "learning_rate": 1.5512136906893415e-05, "loss": 0.906, "step": 1633 }, { "epoch": 0.335042034037318, "grad_norm": 1.2857812643051147, "learning_rate": 1.5506593974452645e-05, "loss": 1.0042, "step": 1634 }, { "epoch": 0.33524707812179616, "grad_norm": 1.3003190755844116, "learning_rate": 1.5501048612827362e-05, "loss": 0.9672, "step": 1635 }, { "epoch": 0.33545212220627435, "grad_norm": 1.2497392892837524, "learning_rate": 1.5495500824463857e-05, "loss": 0.9732, "step": 1636 }, { "epoch": 0.3356571662907525, "grad_norm": 1.3444255590438843, "learning_rate": 1.5489950611809484e-05, "loss": 1.0084, "step": 1637 }, { "epoch": 0.3358622103752307, "grad_norm": 1.3845086097717285, "learning_rate": 1.5484397977312675e-05, "loss": 1.0347, "step": 1638 }, { "epoch": 0.33606725445970886, "grad_norm": 1.4903571605682373, "learning_rate": 1.5478842923422915e-05, "loss": 1.0574, "step": 1639 }, { "epoch": 0.336272298544187, "grad_norm": 1.2782459259033203, "learning_rate": 1.547328545259078e-05, "loss": 0.9445, "step": 1640 }, { "epoch": 0.3364773426286652, "grad_norm": 1.3454606533050537, "learning_rate": 1.546772556726788e-05, "loss": 1.0548, "step": 1641 }, { "epoch": 0.3366823867131433, "grad_norm": 1.3575770854949951, "learning_rate": 1.5462163269906928e-05, "loss": 0.8835, "step": 1642 }, { "epoch": 0.3368874307976215, "grad_norm": 1.3858258724212646, "learning_rate": 1.5456598562961666e-05, "loss": 0.9926, "step": 1643 }, { "epoch": 0.33709247488209965, "grad_norm": 1.2901602983474731, "learning_rate": 1.5451031448886923e-05, "loss": 0.9902, "step": 1644 }, { "epoch": 0.33729751896657784, "grad_norm": 1.2505993843078613, "learning_rate": 1.5445461930138582e-05, "loss": 1.0217, "step": 1645 }, { "epoch": 0.33750256305105597, "grad_norm": 1.3005443811416626, "learning_rate": 1.543989000917358e-05, "loss": 0.9827, "step": 1646 }, { "epoch": 0.33770760713553416, "grad_norm": 1.3172693252563477, "learning_rate": 1.5434315688449924e-05, "loss": 1.0414, "step": 1647 }, { "epoch": 0.3379126512200123, "grad_norm": 1.2493771314620972, "learning_rate": 1.542873897042668e-05, "loss": 0.9462, "step": 1648 }, { "epoch": 0.3381176953044905, "grad_norm": 1.1764830350875854, "learning_rate": 1.5423159857563955e-05, "loss": 0.8682, "step": 1649 }, { "epoch": 0.3383227393889686, "grad_norm": 1.1660852432250977, "learning_rate": 1.5417578352322934e-05, "loss": 1.0359, "step": 1650 }, { "epoch": 0.3385277834734468, "grad_norm": 1.3399044275283813, "learning_rate": 1.5411994457165848e-05, "loss": 0.9762, "step": 1651 }, { "epoch": 0.33873282755792494, "grad_norm": 1.218885898590088, "learning_rate": 1.5406408174555978e-05, "loss": 0.8979, "step": 1652 }, { "epoch": 0.33893787164240313, "grad_norm": 1.3966624736785889, "learning_rate": 1.5400819506957664e-05, "loss": 1.0267, "step": 1653 }, { "epoch": 0.33914291572688127, "grad_norm": 1.3363350629806519, "learning_rate": 1.5395228456836298e-05, "loss": 0.9518, "step": 1654 }, { "epoch": 0.33934795981135946, "grad_norm": 1.2625999450683594, "learning_rate": 1.5389635026658317e-05, "loss": 0.9543, "step": 1655 }, { "epoch": 0.3395530038958376, "grad_norm": 1.353677749633789, "learning_rate": 1.5384039218891224e-05, "loss": 0.9187, "step": 1656 }, { "epoch": 0.3397580479803158, "grad_norm": 1.2661495208740234, "learning_rate": 1.5378441036003543e-05, "loss": 0.9302, "step": 1657 }, { "epoch": 0.3399630920647939, "grad_norm": 1.2321685552597046, "learning_rate": 1.5372840480464876e-05, "loss": 0.9057, "step": 1658 }, { "epoch": 0.3401681361492721, "grad_norm": 1.2402894496917725, "learning_rate": 1.5367237554745847e-05, "loss": 0.9046, "step": 1659 }, { "epoch": 0.34037318023375024, "grad_norm": 1.2151148319244385, "learning_rate": 1.5361632261318144e-05, "loss": 0.9473, "step": 1660 }, { "epoch": 0.34057822431822843, "grad_norm": 1.3960984945297241, "learning_rate": 1.5356024602654488e-05, "loss": 0.8405, "step": 1661 }, { "epoch": 0.34078326840270656, "grad_norm": 1.3611626625061035, "learning_rate": 1.535041458122865e-05, "loss": 0.9716, "step": 1662 }, { "epoch": 0.34098831248718475, "grad_norm": 1.2889282703399658, "learning_rate": 1.5344802199515444e-05, "loss": 1.0043, "step": 1663 }, { "epoch": 0.3411933565716629, "grad_norm": 1.3320121765136719, "learning_rate": 1.533918745999071e-05, "loss": 1.0494, "step": 1664 }, { "epoch": 0.3413984006561411, "grad_norm": 1.428362250328064, "learning_rate": 1.5333570365131353e-05, "loss": 0.9484, "step": 1665 }, { "epoch": 0.3416034447406192, "grad_norm": 1.3442074060440063, "learning_rate": 1.5327950917415296e-05, "loss": 0.866, "step": 1666 }, { "epoch": 0.3418084888250974, "grad_norm": 1.3291518688201904, "learning_rate": 1.5322329119321508e-05, "loss": 0.9258, "step": 1667 }, { "epoch": 0.34201353290957553, "grad_norm": 1.4111655950546265, "learning_rate": 1.5316704973330004e-05, "loss": 1.0476, "step": 1668 }, { "epoch": 0.3422185769940537, "grad_norm": 1.3606595993041992, "learning_rate": 1.531107848192181e-05, "loss": 0.9944, "step": 1669 }, { "epoch": 0.34242362107853186, "grad_norm": 1.2585041522979736, "learning_rate": 1.5305449647579018e-05, "loss": 0.8709, "step": 1670 }, { "epoch": 0.34262866516301005, "grad_norm": 1.3438533544540405, "learning_rate": 1.5299818472784722e-05, "loss": 0.9637, "step": 1671 }, { "epoch": 0.3428337092474882, "grad_norm": 1.2181148529052734, "learning_rate": 1.529418496002308e-05, "loss": 0.9641, "step": 1672 }, { "epoch": 0.3430387533319664, "grad_norm": 1.2498605251312256, "learning_rate": 1.528854911177925e-05, "loss": 0.8642, "step": 1673 }, { "epoch": 0.34324379741644456, "grad_norm": 1.212013840675354, "learning_rate": 1.5282910930539455e-05, "loss": 0.9533, "step": 1674 }, { "epoch": 0.3434488415009227, "grad_norm": 1.274147391319275, "learning_rate": 1.527727041879091e-05, "loss": 0.9844, "step": 1675 }, { "epoch": 0.3436538855854009, "grad_norm": 1.183814287185669, "learning_rate": 1.5271627579021885e-05, "loss": 0.9902, "step": 1676 }, { "epoch": 0.343858929669879, "grad_norm": 1.4847970008850098, "learning_rate": 1.5265982413721662e-05, "loss": 0.9163, "step": 1677 }, { "epoch": 0.3440639737543572, "grad_norm": 1.4484021663665771, "learning_rate": 1.5260334925380564e-05, "loss": 1.004, "step": 1678 }, { "epoch": 0.34426901783883535, "grad_norm": 1.3607311248779297, "learning_rate": 1.5254685116489926e-05, "loss": 0.9236, "step": 1679 }, { "epoch": 0.34447406192331353, "grad_norm": 1.2598122358322144, "learning_rate": 1.5249032989542105e-05, "loss": 0.953, "step": 1680 }, { "epoch": 0.34467910600779167, "grad_norm": 1.3686121702194214, "learning_rate": 1.5243378547030496e-05, "loss": 0.9806, "step": 1681 }, { "epoch": 0.34488415009226986, "grad_norm": 1.275818109512329, "learning_rate": 1.5237721791449497e-05, "loss": 1.0183, "step": 1682 }, { "epoch": 0.345089194176748, "grad_norm": 1.2066597938537598, "learning_rate": 1.523206272529454e-05, "loss": 1.0041, "step": 1683 }, { "epoch": 0.3452942382612262, "grad_norm": 1.229986310005188, "learning_rate": 1.5226401351062073e-05, "loss": 0.9414, "step": 1684 }, { "epoch": 0.3454992823457043, "grad_norm": 1.2908591032028198, "learning_rate": 1.5220737671249551e-05, "loss": 0.8809, "step": 1685 }, { "epoch": 0.3457043264301825, "grad_norm": 1.284098505973816, "learning_rate": 1.5215071688355463e-05, "loss": 0.9379, "step": 1686 }, { "epoch": 0.34590937051466064, "grad_norm": 1.3017412424087524, "learning_rate": 1.5209403404879305e-05, "loss": 1.0174, "step": 1687 }, { "epoch": 0.34611441459913883, "grad_norm": 1.2454102039337158, "learning_rate": 1.5203732823321589e-05, "loss": 0.9616, "step": 1688 }, { "epoch": 0.34631945868361697, "grad_norm": 1.3797448873519897, "learning_rate": 1.5198059946183841e-05, "loss": 0.9953, "step": 1689 }, { "epoch": 0.34652450276809516, "grad_norm": 1.1677788496017456, "learning_rate": 1.5192384775968602e-05, "loss": 0.9511, "step": 1690 }, { "epoch": 0.3467295468525733, "grad_norm": 1.3742173910140991, "learning_rate": 1.518670731517942e-05, "loss": 0.9218, "step": 1691 }, { "epoch": 0.3469345909370515, "grad_norm": 1.1840389966964722, "learning_rate": 1.5181027566320858e-05, "loss": 0.9465, "step": 1692 }, { "epoch": 0.3471396350215296, "grad_norm": 1.227171540260315, "learning_rate": 1.5175345531898483e-05, "loss": 0.9493, "step": 1693 }, { "epoch": 0.3473446791060078, "grad_norm": 1.3767919540405273, "learning_rate": 1.5169661214418875e-05, "loss": 0.9901, "step": 1694 }, { "epoch": 0.34754972319048594, "grad_norm": 1.3611493110656738, "learning_rate": 1.5163974616389621e-05, "loss": 1.007, "step": 1695 }, { "epoch": 0.3477547672749641, "grad_norm": 1.2897323369979858, "learning_rate": 1.5158285740319309e-05, "loss": 1.0158, "step": 1696 }, { "epoch": 0.34795981135944226, "grad_norm": 1.2228927612304688, "learning_rate": 1.5152594588717544e-05, "loss": 0.9842, "step": 1697 }, { "epoch": 0.34816485544392045, "grad_norm": 1.3319063186645508, "learning_rate": 1.5146901164094914e-05, "loss": 0.9552, "step": 1698 }, { "epoch": 0.3483698995283986, "grad_norm": 1.3360121250152588, "learning_rate": 1.5141205468963034e-05, "loss": 0.9912, "step": 1699 }, { "epoch": 0.3485749436128768, "grad_norm": 1.4254752397537231, "learning_rate": 1.5135507505834502e-05, "loss": 0.984, "step": 1700 }, { "epoch": 0.3487799876973549, "grad_norm": 1.3500072956085205, "learning_rate": 1.5129807277222926e-05, "loss": 0.9612, "step": 1701 }, { "epoch": 0.3489850317818331, "grad_norm": 1.2729791402816772, "learning_rate": 1.5124104785642909e-05, "loss": 0.9674, "step": 1702 }, { "epoch": 0.34919007586631123, "grad_norm": 1.2112038135528564, "learning_rate": 1.5118400033610055e-05, "loss": 0.9078, "step": 1703 }, { "epoch": 0.3493951199507894, "grad_norm": 1.1921589374542236, "learning_rate": 1.5112693023640962e-05, "loss": 0.9064, "step": 1704 }, { "epoch": 0.34960016403526756, "grad_norm": 1.25468111038208, "learning_rate": 1.5106983758253227e-05, "loss": 0.973, "step": 1705 }, { "epoch": 0.34980520811974575, "grad_norm": 1.34256911277771, "learning_rate": 1.5101272239965446e-05, "loss": 0.9119, "step": 1706 }, { "epoch": 0.3500102522042239, "grad_norm": 1.3049054145812988, "learning_rate": 1.5095558471297196e-05, "loss": 0.9749, "step": 1707 }, { "epoch": 0.35021529628870207, "grad_norm": 1.285056710243225, "learning_rate": 1.5089842454769064e-05, "loss": 0.9345, "step": 1708 }, { "epoch": 0.35042034037318026, "grad_norm": 1.2933499813079834, "learning_rate": 1.5084124192902612e-05, "loss": 0.8912, "step": 1709 }, { "epoch": 0.3506253844576584, "grad_norm": 1.2779730558395386, "learning_rate": 1.5078403688220402e-05, "loss": 0.9995, "step": 1710 }, { "epoch": 0.3508304285421366, "grad_norm": 1.2835344076156616, "learning_rate": 1.5072680943245982e-05, "loss": 0.8409, "step": 1711 }, { "epoch": 0.3510354726266147, "grad_norm": 1.2644355297088623, "learning_rate": 1.5066955960503893e-05, "loss": 0.9247, "step": 1712 }, { "epoch": 0.3512405167110929, "grad_norm": 1.3070714473724365, "learning_rate": 1.506122874251966e-05, "loss": 0.9675, "step": 1713 }, { "epoch": 0.35144556079557104, "grad_norm": 1.2782716751098633, "learning_rate": 1.5055499291819788e-05, "loss": 1.0021, "step": 1714 }, { "epoch": 0.35165060488004923, "grad_norm": 1.385053277015686, "learning_rate": 1.5049767610931777e-05, "loss": 1.0311, "step": 1715 }, { "epoch": 0.35185564896452737, "grad_norm": 1.2235761880874634, "learning_rate": 1.5044033702384112e-05, "loss": 0.9549, "step": 1716 }, { "epoch": 0.35206069304900556, "grad_norm": 1.2196333408355713, "learning_rate": 1.5038297568706244e-05, "loss": 0.9491, "step": 1717 }, { "epoch": 0.3522657371334837, "grad_norm": 1.3079006671905518, "learning_rate": 1.5032559212428621e-05, "loss": 0.8923, "step": 1718 }, { "epoch": 0.3524707812179619, "grad_norm": 1.1810883283615112, "learning_rate": 1.5026818636082672e-05, "loss": 0.8847, "step": 1719 }, { "epoch": 0.35267582530244, "grad_norm": 1.2558256387710571, "learning_rate": 1.5021075842200796e-05, "loss": 0.9176, "step": 1720 }, { "epoch": 0.3528808693869182, "grad_norm": 1.184658169746399, "learning_rate": 1.5015330833316376e-05, "loss": 0.9921, "step": 1721 }, { "epoch": 0.35308591347139634, "grad_norm": 1.2342078685760498, "learning_rate": 1.5009583611963772e-05, "loss": 0.8746, "step": 1722 }, { "epoch": 0.35329095755587453, "grad_norm": 1.2224928140640259, "learning_rate": 1.5003834180678316e-05, "loss": 0.8946, "step": 1723 }, { "epoch": 0.35349600164035266, "grad_norm": 1.3458856344223022, "learning_rate": 1.4998082541996324e-05, "loss": 1.0694, "step": 1724 }, { "epoch": 0.35370104572483085, "grad_norm": 1.2942243814468384, "learning_rate": 1.4992328698455075e-05, "loss": 0.9991, "step": 1725 }, { "epoch": 0.353906089809309, "grad_norm": 1.315830945968628, "learning_rate": 1.4986572652592827e-05, "loss": 0.8496, "step": 1726 }, { "epoch": 0.3541111338937872, "grad_norm": 1.2744020223617554, "learning_rate": 1.4980814406948806e-05, "loss": 0.9818, "step": 1727 }, { "epoch": 0.3543161779782653, "grad_norm": 1.267148494720459, "learning_rate": 1.4975053964063217e-05, "loss": 0.9421, "step": 1728 }, { "epoch": 0.3545212220627435, "grad_norm": 1.3045240640640259, "learning_rate": 1.496929132647722e-05, "loss": 0.9462, "step": 1729 }, { "epoch": 0.35472626614722164, "grad_norm": 1.2444361448287964, "learning_rate": 1.4963526496732952e-05, "loss": 0.9605, "step": 1730 }, { "epoch": 0.3549313102316998, "grad_norm": 1.3147176504135132, "learning_rate": 1.4957759477373519e-05, "loss": 0.9782, "step": 1731 }, { "epoch": 0.35513635431617796, "grad_norm": 1.2118706703186035, "learning_rate": 1.4951990270942991e-05, "loss": 0.9315, "step": 1732 }, { "epoch": 0.35534139840065615, "grad_norm": 1.3824220895767212, "learning_rate": 1.4946218879986398e-05, "loss": 0.9876, "step": 1733 }, { "epoch": 0.3555464424851343, "grad_norm": 1.3151408433914185, "learning_rate": 1.4940445307049736e-05, "loss": 0.9753, "step": 1734 }, { "epoch": 0.3557514865696125, "grad_norm": 1.3080613613128662, "learning_rate": 1.493466955467997e-05, "loss": 0.9763, "step": 1735 }, { "epoch": 0.3559565306540906, "grad_norm": 1.2422924041748047, "learning_rate": 1.4928891625425016e-05, "loss": 0.9861, "step": 1736 }, { "epoch": 0.3561615747385688, "grad_norm": 1.33841872215271, "learning_rate": 1.492311152183376e-05, "loss": 0.9828, "step": 1737 }, { "epoch": 0.35636661882304693, "grad_norm": 1.1971608400344849, "learning_rate": 1.491732924645604e-05, "loss": 0.956, "step": 1738 }, { "epoch": 0.3565716629075251, "grad_norm": 1.3977389335632324, "learning_rate": 1.4911544801842655e-05, "loss": 0.9301, "step": 1739 }, { "epoch": 0.35677670699200326, "grad_norm": 1.32041335105896, "learning_rate": 1.4905758190545365e-05, "loss": 0.9245, "step": 1740 }, { "epoch": 0.35698175107648145, "grad_norm": 1.3359309434890747, "learning_rate": 1.4899969415116875e-05, "loss": 1.0524, "step": 1741 }, { "epoch": 0.3571867951609596, "grad_norm": 1.366360068321228, "learning_rate": 1.4894178478110856e-05, "loss": 1.037, "step": 1742 }, { "epoch": 0.35739183924543777, "grad_norm": 1.2985830307006836, "learning_rate": 1.4888385382081926e-05, "loss": 0.9849, "step": 1743 }, { "epoch": 0.3575968833299159, "grad_norm": 1.1725878715515137, "learning_rate": 1.4882590129585661e-05, "loss": 0.9273, "step": 1744 }, { "epoch": 0.3578019274143941, "grad_norm": 1.3888438940048218, "learning_rate": 1.4876792723178576e-05, "loss": 0.9301, "step": 1745 }, { "epoch": 0.3580069714988723, "grad_norm": 1.326094388961792, "learning_rate": 1.4870993165418157e-05, "loss": 1.0259, "step": 1746 }, { "epoch": 0.3582120155833504, "grad_norm": 1.2540521621704102, "learning_rate": 1.4865191458862816e-05, "loss": 0.9847, "step": 1747 }, { "epoch": 0.3584170596678286, "grad_norm": 1.2326502799987793, "learning_rate": 1.4859387606071928e-05, "loss": 0.9851, "step": 1748 }, { "epoch": 0.35862210375230674, "grad_norm": 1.262442946434021, "learning_rate": 1.4853581609605813e-05, "loss": 0.9896, "step": 1749 }, { "epoch": 0.35882714783678493, "grad_norm": 1.3468549251556396, "learning_rate": 1.484777347202573e-05, "loss": 0.9503, "step": 1750 }, { "epoch": 0.35903219192126307, "grad_norm": 1.2723509073257446, "learning_rate": 1.484196319589389e-05, "loss": 0.9782, "step": 1751 }, { "epoch": 0.35923723600574126, "grad_norm": 1.4243875741958618, "learning_rate": 1.4836150783773442e-05, "loss": 1.0215, "step": 1752 }, { "epoch": 0.3594422800902194, "grad_norm": 1.364362120628357, "learning_rate": 1.483033623822848e-05, "loss": 1.0366, "step": 1753 }, { "epoch": 0.3596473241746976, "grad_norm": 1.3571218252182007, "learning_rate": 1.4824519561824037e-05, "loss": 0.9354, "step": 1754 }, { "epoch": 0.3598523682591757, "grad_norm": 1.319419503211975, "learning_rate": 1.4818700757126095e-05, "loss": 0.8293, "step": 1755 }, { "epoch": 0.3600574123436539, "grad_norm": 1.2852988243103027, "learning_rate": 1.4812879826701555e-05, "loss": 1.0086, "step": 1756 }, { "epoch": 0.36026245642813204, "grad_norm": 1.2400274276733398, "learning_rate": 1.4807056773118276e-05, "loss": 0.9355, "step": 1757 }, { "epoch": 0.36046750051261023, "grad_norm": 1.3238695859909058, "learning_rate": 1.480123159894505e-05, "loss": 0.9742, "step": 1758 }, { "epoch": 0.36067254459708836, "grad_norm": 1.2706485986709595, "learning_rate": 1.479540430675159e-05, "loss": 0.9672, "step": 1759 }, { "epoch": 0.36087758868156655, "grad_norm": 1.285881519317627, "learning_rate": 1.478957489910856e-05, "loss": 1.0232, "step": 1760 }, { "epoch": 0.3610826327660447, "grad_norm": 1.3372666835784912, "learning_rate": 1.4783743378587549e-05, "loss": 1.058, "step": 1761 }, { "epoch": 0.3612876768505229, "grad_norm": 1.2520592212677002, "learning_rate": 1.4777909747761085e-05, "loss": 0.9648, "step": 1762 }, { "epoch": 0.361492720935001, "grad_norm": 1.4367153644561768, "learning_rate": 1.4772074009202612e-05, "loss": 0.9716, "step": 1763 }, { "epoch": 0.3616977650194792, "grad_norm": 1.2416092157363892, "learning_rate": 1.4766236165486526e-05, "loss": 0.8755, "step": 1764 }, { "epoch": 0.36190280910395733, "grad_norm": 1.3018829822540283, "learning_rate": 1.4760396219188126e-05, "loss": 0.9822, "step": 1765 }, { "epoch": 0.3621078531884355, "grad_norm": 1.2463350296020508, "learning_rate": 1.4754554172883662e-05, "loss": 0.9632, "step": 1766 }, { "epoch": 0.36231289727291366, "grad_norm": 1.2744200229644775, "learning_rate": 1.4748710029150296e-05, "loss": 0.9548, "step": 1767 }, { "epoch": 0.36251794135739185, "grad_norm": 1.225520133972168, "learning_rate": 1.4742863790566117e-05, "loss": 0.9186, "step": 1768 }, { "epoch": 0.36272298544187, "grad_norm": 1.295256495475769, "learning_rate": 1.4737015459710148e-05, "loss": 0.9153, "step": 1769 }, { "epoch": 0.3629280295263482, "grad_norm": 1.2686809301376343, "learning_rate": 1.4731165039162322e-05, "loss": 0.94, "step": 1770 }, { "epoch": 0.3631330736108263, "grad_norm": 1.279422402381897, "learning_rate": 1.4725312531503504e-05, "loss": 0.9038, "step": 1771 }, { "epoch": 0.3633381176953045, "grad_norm": 1.2642850875854492, "learning_rate": 1.4719457939315468e-05, "loss": 0.9923, "step": 1772 }, { "epoch": 0.36354316177978263, "grad_norm": 1.3928601741790771, "learning_rate": 1.4713601265180924e-05, "loss": 0.9175, "step": 1773 }, { "epoch": 0.3637482058642608, "grad_norm": 1.3448349237442017, "learning_rate": 1.470774251168348e-05, "loss": 0.9285, "step": 1774 }, { "epoch": 0.36395324994873895, "grad_norm": 1.2764503955841064, "learning_rate": 1.4701881681407684e-05, "loss": 0.9653, "step": 1775 }, { "epoch": 0.36415829403321714, "grad_norm": 1.3012465238571167, "learning_rate": 1.4696018776938983e-05, "loss": 0.9784, "step": 1776 }, { "epoch": 0.3643633381176953, "grad_norm": 1.3586992025375366, "learning_rate": 1.4690153800863743e-05, "loss": 1.0366, "step": 1777 }, { "epoch": 0.36456838220217347, "grad_norm": 1.319172739982605, "learning_rate": 1.468428675576925e-05, "loss": 0.9484, "step": 1778 }, { "epoch": 0.3647734262866516, "grad_norm": 1.3427135944366455, "learning_rate": 1.4678417644243695e-05, "loss": 1.0242, "step": 1779 }, { "epoch": 0.3649784703711298, "grad_norm": 1.3881100416183472, "learning_rate": 1.4672546468876187e-05, "loss": 0.8773, "step": 1780 }, { "epoch": 0.365183514455608, "grad_norm": 1.2211986780166626, "learning_rate": 1.4666673232256738e-05, "loss": 0.9097, "step": 1781 }, { "epoch": 0.3653885585400861, "grad_norm": 1.3243157863616943, "learning_rate": 1.4660797936976278e-05, "loss": 0.9389, "step": 1782 }, { "epoch": 0.3655936026245643, "grad_norm": 1.2728140354156494, "learning_rate": 1.4654920585626637e-05, "loss": 0.9653, "step": 1783 }, { "epoch": 0.36579864670904244, "grad_norm": 1.2410385608673096, "learning_rate": 1.4649041180800559e-05, "loss": 0.9061, "step": 1784 }, { "epoch": 0.36600369079352063, "grad_norm": 1.1761537790298462, "learning_rate": 1.4643159725091689e-05, "loss": 1.0488, "step": 1785 }, { "epoch": 0.36620873487799877, "grad_norm": 1.3115780353546143, "learning_rate": 1.4637276221094577e-05, "loss": 0.9379, "step": 1786 }, { "epoch": 0.36641377896247695, "grad_norm": 1.3038735389709473, "learning_rate": 1.4631390671404682e-05, "loss": 1.0116, "step": 1787 }, { "epoch": 0.3666188230469551, "grad_norm": 1.1911516189575195, "learning_rate": 1.4625503078618355e-05, "loss": 0.8608, "step": 1788 }, { "epoch": 0.3668238671314333, "grad_norm": 1.297575831413269, "learning_rate": 1.4619613445332863e-05, "loss": 0.9266, "step": 1789 }, { "epoch": 0.3670289112159114, "grad_norm": 1.250009536743164, "learning_rate": 1.461372177414636e-05, "loss": 1.0085, "step": 1790 }, { "epoch": 0.3672339553003896, "grad_norm": 1.19649338722229, "learning_rate": 1.4607828067657903e-05, "loss": 0.8864, "step": 1791 }, { "epoch": 0.36743899938486774, "grad_norm": 1.2599154710769653, "learning_rate": 1.460193232846745e-05, "loss": 0.9559, "step": 1792 }, { "epoch": 0.3676440434693459, "grad_norm": 1.3519823551177979, "learning_rate": 1.4596034559175851e-05, "loss": 0.9945, "step": 1793 }, { "epoch": 0.36784908755382406, "grad_norm": 1.2166805267333984, "learning_rate": 1.4590134762384858e-05, "loss": 0.9407, "step": 1794 }, { "epoch": 0.36805413163830225, "grad_norm": 1.3162742853164673, "learning_rate": 1.4584232940697108e-05, "loss": 0.9366, "step": 1795 }, { "epoch": 0.3682591757227804, "grad_norm": 1.3107404708862305, "learning_rate": 1.4578329096716145e-05, "loss": 1.0171, "step": 1796 }, { "epoch": 0.3684642198072586, "grad_norm": 1.2321562767028809, "learning_rate": 1.4572423233046386e-05, "loss": 0.8832, "step": 1797 }, { "epoch": 0.3686692638917367, "grad_norm": 1.2134662866592407, "learning_rate": 1.456651535229316e-05, "loss": 0.9173, "step": 1798 }, { "epoch": 0.3688743079762149, "grad_norm": 1.173237919807434, "learning_rate": 1.4560605457062666e-05, "loss": 0.8836, "step": 1799 }, { "epoch": 0.36907935206069303, "grad_norm": 1.4593405723571777, "learning_rate": 1.4554693549962008e-05, "loss": 0.9804, "step": 1800 }, { "epoch": 0.3692843961451712, "grad_norm": 1.3303368091583252, "learning_rate": 1.454877963359917e-05, "loss": 0.9333, "step": 1801 }, { "epoch": 0.36948944022964936, "grad_norm": 1.206833004951477, "learning_rate": 1.4542863710583022e-05, "loss": 0.9683, "step": 1802 }, { "epoch": 0.36969448431412755, "grad_norm": 1.2408477067947388, "learning_rate": 1.453694578352332e-05, "loss": 0.9398, "step": 1803 }, { "epoch": 0.3698995283986057, "grad_norm": 1.311044692993164, "learning_rate": 1.45310258550307e-05, "loss": 0.9782, "step": 1804 }, { "epoch": 0.37010457248308387, "grad_norm": 1.321362018585205, "learning_rate": 1.4525103927716697e-05, "loss": 0.9704, "step": 1805 }, { "epoch": 0.370309616567562, "grad_norm": 1.3915597200393677, "learning_rate": 1.4519180004193704e-05, "loss": 0.9935, "step": 1806 }, { "epoch": 0.3705146606520402, "grad_norm": 1.2630432844161987, "learning_rate": 1.4513254087075015e-05, "loss": 1.0311, "step": 1807 }, { "epoch": 0.37071970473651833, "grad_norm": 1.2967944145202637, "learning_rate": 1.4507326178974789e-05, "loss": 1.0106, "step": 1808 }, { "epoch": 0.3709247488209965, "grad_norm": 1.3661316633224487, "learning_rate": 1.4501396282508075e-05, "loss": 0.9614, "step": 1809 }, { "epoch": 0.37112979290547465, "grad_norm": 1.2680962085723877, "learning_rate": 1.4495464400290791e-05, "loss": 0.9912, "step": 1810 }, { "epoch": 0.37133483698995284, "grad_norm": 1.2887825965881348, "learning_rate": 1.4489530534939734e-05, "loss": 0.9642, "step": 1811 }, { "epoch": 0.371539881074431, "grad_norm": 1.2298495769500732, "learning_rate": 1.4483594689072571e-05, "loss": 0.9393, "step": 1812 }, { "epoch": 0.37174492515890917, "grad_norm": 1.317403793334961, "learning_rate": 1.4477656865307856e-05, "loss": 0.9953, "step": 1813 }, { "epoch": 0.3719499692433873, "grad_norm": 1.273972511291504, "learning_rate": 1.4471717066265e-05, "loss": 0.9514, "step": 1814 }, { "epoch": 0.3721550133278655, "grad_norm": 1.251021385192871, "learning_rate": 1.4465775294564294e-05, "loss": 0.9325, "step": 1815 }, { "epoch": 0.3723600574123437, "grad_norm": 1.2516521215438843, "learning_rate": 1.4459831552826897e-05, "loss": 0.9304, "step": 1816 }, { "epoch": 0.3725651014968218, "grad_norm": 1.2781394720077515, "learning_rate": 1.4453885843674837e-05, "loss": 0.964, "step": 1817 }, { "epoch": 0.3727701455813, "grad_norm": 1.2705529928207397, "learning_rate": 1.444793816973101e-05, "loss": 0.9125, "step": 1818 }, { "epoch": 0.37297518966577814, "grad_norm": 1.2690311670303345, "learning_rate": 1.4441988533619182e-05, "loss": 0.9524, "step": 1819 }, { "epoch": 0.37318023375025633, "grad_norm": 1.364837646484375, "learning_rate": 1.4436036937963976e-05, "loss": 0.9392, "step": 1820 }, { "epoch": 0.37338527783473446, "grad_norm": 1.3432884216308594, "learning_rate": 1.4430083385390892e-05, "loss": 0.9292, "step": 1821 }, { "epoch": 0.37359032191921265, "grad_norm": 1.2968240976333618, "learning_rate": 1.4424127878526278e-05, "loss": 0.9208, "step": 1822 }, { "epoch": 0.3737953660036908, "grad_norm": 1.3577295541763306, "learning_rate": 1.4418170419997362e-05, "loss": 0.9149, "step": 1823 }, { "epoch": 0.374000410088169, "grad_norm": 1.2991958856582642, "learning_rate": 1.4412211012432213e-05, "loss": 0.8944, "step": 1824 }, { "epoch": 0.3742054541726471, "grad_norm": 1.3467276096343994, "learning_rate": 1.4406249658459781e-05, "loss": 0.8826, "step": 1825 }, { "epoch": 0.3744104982571253, "grad_norm": 1.3412052392959595, "learning_rate": 1.4400286360709855e-05, "loss": 0.9637, "step": 1826 }, { "epoch": 0.37461554234160344, "grad_norm": 1.2783997058868408, "learning_rate": 1.4394321121813093e-05, "loss": 0.9749, "step": 1827 }, { "epoch": 0.3748205864260816, "grad_norm": 1.249147653579712, "learning_rate": 1.4388353944401008e-05, "loss": 0.9431, "step": 1828 }, { "epoch": 0.37502563051055976, "grad_norm": 1.2489467859268188, "learning_rate": 1.4382384831105966e-05, "loss": 1.0202, "step": 1829 }, { "epoch": 0.37523067459503795, "grad_norm": 1.3015694618225098, "learning_rate": 1.437641378456119e-05, "loss": 0.9919, "step": 1830 }, { "epoch": 0.3754357186795161, "grad_norm": 1.4338197708129883, "learning_rate": 1.4370440807400747e-05, "loss": 0.9597, "step": 1831 }, { "epoch": 0.3756407627639943, "grad_norm": 1.304438591003418, "learning_rate": 1.436446590225957e-05, "loss": 0.9828, "step": 1832 }, { "epoch": 0.3758458068484724, "grad_norm": 1.2993085384368896, "learning_rate": 1.435848907177343e-05, "loss": 1.0023, "step": 1833 }, { "epoch": 0.3760508509329506, "grad_norm": 1.2932696342468262, "learning_rate": 1.4352510318578952e-05, "loss": 0.9385, "step": 1834 }, { "epoch": 0.37625589501742873, "grad_norm": 1.2821613550186157, "learning_rate": 1.4346529645313611e-05, "loss": 0.9077, "step": 1835 }, { "epoch": 0.3764609391019069, "grad_norm": 1.245803952217102, "learning_rate": 1.4340547054615729e-05, "loss": 0.9499, "step": 1836 }, { "epoch": 0.37666598318638506, "grad_norm": 1.1985504627227783, "learning_rate": 1.433456254912447e-05, "loss": 0.8841, "step": 1837 }, { "epoch": 0.37687102727086325, "grad_norm": 1.2969504594802856, "learning_rate": 1.4328576131479844e-05, "loss": 0.9369, "step": 1838 }, { "epoch": 0.3770760713553414, "grad_norm": 1.1787619590759277, "learning_rate": 1.4322587804322705e-05, "loss": 0.9037, "step": 1839 }, { "epoch": 0.37728111543981957, "grad_norm": 1.316200613975525, "learning_rate": 1.4316597570294755e-05, "loss": 1.0087, "step": 1840 }, { "epoch": 0.3774861595242977, "grad_norm": 1.3462406396865845, "learning_rate": 1.4310605432038527e-05, "loss": 0.9397, "step": 1841 }, { "epoch": 0.3776912036087759, "grad_norm": 1.2647439241409302, "learning_rate": 1.4304611392197399e-05, "loss": 0.9408, "step": 1842 }, { "epoch": 0.37789624769325403, "grad_norm": 1.3463785648345947, "learning_rate": 1.429861545341559e-05, "loss": 0.9345, "step": 1843 }, { "epoch": 0.3781012917777322, "grad_norm": 1.2587939500808716, "learning_rate": 1.4292617618338154e-05, "loss": 0.861, "step": 1844 }, { "epoch": 0.37830633586221035, "grad_norm": 1.2840384244918823, "learning_rate": 1.4286617889610982e-05, "loss": 0.9854, "step": 1845 }, { "epoch": 0.37851137994668854, "grad_norm": 1.2326374053955078, "learning_rate": 1.42806162698808e-05, "loss": 0.9731, "step": 1846 }, { "epoch": 0.3787164240311667, "grad_norm": 1.3280577659606934, "learning_rate": 1.427461276179517e-05, "loss": 0.9479, "step": 1847 }, { "epoch": 0.37892146811564487, "grad_norm": 1.350959062576294, "learning_rate": 1.4268607368002485e-05, "loss": 1.0038, "step": 1848 }, { "epoch": 0.379126512200123, "grad_norm": 1.3463613986968994, "learning_rate": 1.4262600091151968e-05, "loss": 1.0242, "step": 1849 }, { "epoch": 0.3793315562846012, "grad_norm": 1.2990541458129883, "learning_rate": 1.425659093389368e-05, "loss": 1.0085, "step": 1850 }, { "epoch": 0.3795366003690793, "grad_norm": 1.296902060508728, "learning_rate": 1.4250579898878502e-05, "loss": 0.9761, "step": 1851 }, { "epoch": 0.3797416444535575, "grad_norm": 1.2751325368881226, "learning_rate": 1.4244566988758152e-05, "loss": 1.013, "step": 1852 }, { "epoch": 0.3799466885380357, "grad_norm": 1.2996301651000977, "learning_rate": 1.423855220618517e-05, "loss": 0.9601, "step": 1853 }, { "epoch": 0.38015173262251384, "grad_norm": 1.2455748319625854, "learning_rate": 1.4232535553812923e-05, "loss": 0.9271, "step": 1854 }, { "epoch": 0.38035677670699203, "grad_norm": 1.3276793956756592, "learning_rate": 1.4226517034295603e-05, "loss": 1.0052, "step": 1855 }, { "epoch": 0.38056182079147016, "grad_norm": 1.2338591814041138, "learning_rate": 1.4220496650288227e-05, "loss": 0.873, "step": 1856 }, { "epoch": 0.38076686487594835, "grad_norm": 1.2063429355621338, "learning_rate": 1.4214474404446633e-05, "loss": 0.8944, "step": 1857 }, { "epoch": 0.3809719089604265, "grad_norm": 1.3398531675338745, "learning_rate": 1.4208450299427478e-05, "loss": 1.0024, "step": 1858 }, { "epoch": 0.3811769530449047, "grad_norm": 1.3219271898269653, "learning_rate": 1.4202424337888248e-05, "loss": 0.8687, "step": 1859 }, { "epoch": 0.3813819971293828, "grad_norm": 1.267942190170288, "learning_rate": 1.4196396522487236e-05, "loss": 0.9578, "step": 1860 }, { "epoch": 0.381587041213861, "grad_norm": 1.2608972787857056, "learning_rate": 1.4190366855883562e-05, "loss": 0.8993, "step": 1861 }, { "epoch": 0.38179208529833913, "grad_norm": 1.3083988428115845, "learning_rate": 1.4184335340737158e-05, "loss": 0.9593, "step": 1862 }, { "epoch": 0.3819971293828173, "grad_norm": 1.2203272581100464, "learning_rate": 1.417830197970877e-05, "loss": 0.9608, "step": 1863 }, { "epoch": 0.38220217346729546, "grad_norm": 1.1692593097686768, "learning_rate": 1.4172266775459966e-05, "loss": 0.9311, "step": 1864 }, { "epoch": 0.38240721755177365, "grad_norm": 1.2169774770736694, "learning_rate": 1.416622973065312e-05, "loss": 0.9316, "step": 1865 }, { "epoch": 0.3826122616362518, "grad_norm": 1.2857475280761719, "learning_rate": 1.416019084795142e-05, "loss": 0.9576, "step": 1866 }, { "epoch": 0.38281730572073, "grad_norm": 1.2858530282974243, "learning_rate": 1.4154150130018867e-05, "loss": 0.863, "step": 1867 }, { "epoch": 0.3830223498052081, "grad_norm": 1.202562689781189, "learning_rate": 1.4148107579520264e-05, "loss": 0.9043, "step": 1868 }, { "epoch": 0.3832273938896863, "grad_norm": 1.4080147743225098, "learning_rate": 1.4142063199121234e-05, "loss": 0.9984, "step": 1869 }, { "epoch": 0.38343243797416443, "grad_norm": 1.2759766578674316, "learning_rate": 1.41360169914882e-05, "loss": 0.9095, "step": 1870 }, { "epoch": 0.3836374820586426, "grad_norm": 1.3308407068252563, "learning_rate": 1.4129968959288387e-05, "loss": 0.9212, "step": 1871 }, { "epoch": 0.38384252614312075, "grad_norm": 1.4167609214782715, "learning_rate": 1.4123919105189836e-05, "loss": 0.9778, "step": 1872 }, { "epoch": 0.38404757022759894, "grad_norm": 1.2198134660720825, "learning_rate": 1.4117867431861385e-05, "loss": 0.8135, "step": 1873 }, { "epoch": 0.3842526143120771, "grad_norm": 1.2067545652389526, "learning_rate": 1.4111813941972672e-05, "loss": 0.9207, "step": 1874 }, { "epoch": 0.38445765839655527, "grad_norm": 1.290881633758545, "learning_rate": 1.4105758638194145e-05, "loss": 0.9365, "step": 1875 }, { "epoch": 0.3846627024810334, "grad_norm": 1.2712018489837646, "learning_rate": 1.4099701523197043e-05, "loss": 0.9887, "step": 1876 }, { "epoch": 0.3848677465655116, "grad_norm": 1.2856804132461548, "learning_rate": 1.4093642599653406e-05, "loss": 0.9081, "step": 1877 }, { "epoch": 0.3850727906499897, "grad_norm": 1.3155959844589233, "learning_rate": 1.408758187023608e-05, "loss": 0.9579, "step": 1878 }, { "epoch": 0.3852778347344679, "grad_norm": 1.2560666799545288, "learning_rate": 1.4081519337618696e-05, "loss": 1.0035, "step": 1879 }, { "epoch": 0.38548287881894605, "grad_norm": 1.2600371837615967, "learning_rate": 1.4075455004475691e-05, "loss": 0.9589, "step": 1880 }, { "epoch": 0.38568792290342424, "grad_norm": 1.254637360572815, "learning_rate": 1.4069388873482283e-05, "loss": 0.9281, "step": 1881 }, { "epoch": 0.3858929669879024, "grad_norm": 1.3134702444076538, "learning_rate": 1.40633209473145e-05, "loss": 1.0082, "step": 1882 }, { "epoch": 0.38609801107238056, "grad_norm": 1.1321702003479004, "learning_rate": 1.4057251228649147e-05, "loss": 0.9081, "step": 1883 }, { "epoch": 0.3863030551568587, "grad_norm": 1.3273357152938843, "learning_rate": 1.4051179720163832e-05, "loss": 0.9524, "step": 1884 }, { "epoch": 0.3865080992413369, "grad_norm": 1.3819514513015747, "learning_rate": 1.4045106424536938e-05, "loss": 0.8947, "step": 1885 }, { "epoch": 0.386713143325815, "grad_norm": 1.3835349082946777, "learning_rate": 1.4039031344447653e-05, "loss": 0.9769, "step": 1886 }, { "epoch": 0.3869181874102932, "grad_norm": 1.2099319696426392, "learning_rate": 1.4032954482575938e-05, "loss": 0.9309, "step": 1887 }, { "epoch": 0.3871232314947714, "grad_norm": 1.4428573846817017, "learning_rate": 1.4026875841602549e-05, "loss": 1.0004, "step": 1888 }, { "epoch": 0.38732827557924954, "grad_norm": 1.2455413341522217, "learning_rate": 1.4020795424209026e-05, "loss": 0.881, "step": 1889 }, { "epoch": 0.3875333196637277, "grad_norm": 1.4097583293914795, "learning_rate": 1.4014713233077689e-05, "loss": 0.9958, "step": 1890 }, { "epoch": 0.38773836374820586, "grad_norm": 1.384072184562683, "learning_rate": 1.4008629270891639e-05, "loss": 0.9661, "step": 1891 }, { "epoch": 0.38794340783268405, "grad_norm": 1.163151741027832, "learning_rate": 1.4002543540334766e-05, "loss": 0.9211, "step": 1892 }, { "epoch": 0.3881484519171622, "grad_norm": 1.2755881547927856, "learning_rate": 1.3996456044091728e-05, "loss": 0.9842, "step": 1893 }, { "epoch": 0.3883534960016404, "grad_norm": 1.3739274740219116, "learning_rate": 1.3990366784847979e-05, "loss": 0.9439, "step": 1894 }, { "epoch": 0.3885585400861185, "grad_norm": 1.294244647026062, "learning_rate": 1.3984275765289737e-05, "loss": 0.9656, "step": 1895 }, { "epoch": 0.3887635841705967, "grad_norm": 1.3237314224243164, "learning_rate": 1.3978182988103996e-05, "loss": 0.9369, "step": 1896 }, { "epoch": 0.38896862825507483, "grad_norm": 1.3072999715805054, "learning_rate": 1.3972088455978537e-05, "loss": 0.9395, "step": 1897 }, { "epoch": 0.389173672339553, "grad_norm": 1.184841513633728, "learning_rate": 1.3965992171601904e-05, "loss": 0.9084, "step": 1898 }, { "epoch": 0.38937871642403116, "grad_norm": 1.2614436149597168, "learning_rate": 1.3959894137663418e-05, "loss": 1.0269, "step": 1899 }, { "epoch": 0.38958376050850935, "grad_norm": 1.3419371843338013, "learning_rate": 1.3953794356853173e-05, "loss": 0.9663, "step": 1900 }, { "epoch": 0.3897888045929875, "grad_norm": 1.3373677730560303, "learning_rate": 1.3947692831862037e-05, "loss": 0.9302, "step": 1901 }, { "epoch": 0.38999384867746567, "grad_norm": 1.217854619026184, "learning_rate": 1.3941589565381635e-05, "loss": 0.8771, "step": 1902 }, { "epoch": 0.3901988927619438, "grad_norm": 1.4555367231369019, "learning_rate": 1.3935484560104374e-05, "loss": 0.9516, "step": 1903 }, { "epoch": 0.390403936846422, "grad_norm": 1.2827543020248413, "learning_rate": 1.392937781872342e-05, "loss": 0.9263, "step": 1904 }, { "epoch": 0.39060898093090013, "grad_norm": 1.3494203090667725, "learning_rate": 1.3923269343932703e-05, "loss": 0.9753, "step": 1905 }, { "epoch": 0.3908140250153783, "grad_norm": 1.3406789302825928, "learning_rate": 1.391715913842693e-05, "loss": 0.9061, "step": 1906 }, { "epoch": 0.39101906909985645, "grad_norm": 1.24745774269104, "learning_rate": 1.391104720490156e-05, "loss": 0.8915, "step": 1907 }, { "epoch": 0.39122411318433464, "grad_norm": 1.2564518451690674, "learning_rate": 1.3904933546052818e-05, "loss": 0.8955, "step": 1908 }, { "epoch": 0.3914291572688128, "grad_norm": 1.2469788789749146, "learning_rate": 1.3898818164577688e-05, "loss": 1.0164, "step": 1909 }, { "epoch": 0.39163420135329097, "grad_norm": 1.2384402751922607, "learning_rate": 1.3892701063173917e-05, "loss": 0.9763, "step": 1910 }, { "epoch": 0.3918392454377691, "grad_norm": 1.2035483121871948, "learning_rate": 1.3886582244540009e-05, "loss": 1.0108, "step": 1911 }, { "epoch": 0.3920442895222473, "grad_norm": 1.2260626554489136, "learning_rate": 1.3880461711375224e-05, "loss": 1.0489, "step": 1912 }, { "epoch": 0.3922493336067254, "grad_norm": 1.4094996452331543, "learning_rate": 1.3874339466379585e-05, "loss": 0.9318, "step": 1913 }, { "epoch": 0.3924543776912036, "grad_norm": 1.2896758317947388, "learning_rate": 1.386821551225386e-05, "loss": 0.9327, "step": 1914 }, { "epoch": 0.39265942177568175, "grad_norm": 1.3178086280822754, "learning_rate": 1.3862089851699578e-05, "loss": 0.9754, "step": 1915 }, { "epoch": 0.39286446586015994, "grad_norm": 1.330449104309082, "learning_rate": 1.3855962487419023e-05, "loss": 0.9163, "step": 1916 }, { "epoch": 0.3930695099446381, "grad_norm": 1.1666902303695679, "learning_rate": 1.3849833422115221e-05, "loss": 0.9529, "step": 1917 }, { "epoch": 0.39327455402911626, "grad_norm": 1.2879537343978882, "learning_rate": 1.3843702658491961e-05, "loss": 0.9363, "step": 1918 }, { "epoch": 0.3934795981135944, "grad_norm": 1.2503247261047363, "learning_rate": 1.3837570199253766e-05, "loss": 0.8823, "step": 1919 }, { "epoch": 0.3936846421980726, "grad_norm": 1.3109687566757202, "learning_rate": 1.3831436047105924e-05, "loss": 0.9361, "step": 1920 }, { "epoch": 0.3938896862825507, "grad_norm": 1.2573846578598022, "learning_rate": 1.3825300204754455e-05, "loss": 0.9311, "step": 1921 }, { "epoch": 0.3940947303670289, "grad_norm": 1.2524105310440063, "learning_rate": 1.3819162674906134e-05, "loss": 0.9731, "step": 1922 }, { "epoch": 0.3942997744515071, "grad_norm": 1.3586901426315308, "learning_rate": 1.3813023460268475e-05, "loss": 1.0068, "step": 1923 }, { "epoch": 0.39450481853598524, "grad_norm": 1.34584641456604, "learning_rate": 1.380688256354974e-05, "loss": 0.9087, "step": 1924 }, { "epoch": 0.3947098626204634, "grad_norm": 1.2725759744644165, "learning_rate": 1.3800739987458924e-05, "loss": 0.9915, "step": 1925 }, { "epoch": 0.39491490670494156, "grad_norm": 1.388970971107483, "learning_rate": 1.379459573470578e-05, "loss": 0.9177, "step": 1926 }, { "epoch": 0.39511995078941975, "grad_norm": 1.4258142709732056, "learning_rate": 1.378844980800078e-05, "loss": 0.9551, "step": 1927 }, { "epoch": 0.3953249948738979, "grad_norm": 1.248126745223999, "learning_rate": 1.3782302210055149e-05, "loss": 0.923, "step": 1928 }, { "epoch": 0.3955300389583761, "grad_norm": 1.3328819274902344, "learning_rate": 1.3776152943580846e-05, "loss": 1.0205, "step": 1929 }, { "epoch": 0.3957350830428542, "grad_norm": 1.2237831354141235, "learning_rate": 1.3770002011290562e-05, "loss": 0.969, "step": 1930 }, { "epoch": 0.3959401271273324, "grad_norm": 1.233378529548645, "learning_rate": 1.3763849415897728e-05, "loss": 0.9523, "step": 1931 }, { "epoch": 0.39614517121181053, "grad_norm": 1.2162634134292603, "learning_rate": 1.3757695160116502e-05, "loss": 0.9495, "step": 1932 }, { "epoch": 0.3963502152962887, "grad_norm": 1.3221814632415771, "learning_rate": 1.3751539246661783e-05, "loss": 0.9464, "step": 1933 }, { "epoch": 0.39655525938076686, "grad_norm": 1.3487327098846436, "learning_rate": 1.3745381678249195e-05, "loss": 0.948, "step": 1934 }, { "epoch": 0.39676030346524505, "grad_norm": 1.3002903461456299, "learning_rate": 1.3739222457595093e-05, "loss": 0.9797, "step": 1935 }, { "epoch": 0.3969653475497232, "grad_norm": 1.240433692932129, "learning_rate": 1.3733061587416567e-05, "loss": 0.9159, "step": 1936 }, { "epoch": 0.39717039163420137, "grad_norm": 1.468238353729248, "learning_rate": 1.3726899070431423e-05, "loss": 0.9964, "step": 1937 }, { "epoch": 0.3973754357186795, "grad_norm": 1.3094573020935059, "learning_rate": 1.3720734909358204e-05, "loss": 0.8866, "step": 1938 }, { "epoch": 0.3975804798031577, "grad_norm": 1.287270426750183, "learning_rate": 1.371456910691617e-05, "loss": 0.9888, "step": 1939 }, { "epoch": 0.39778552388763583, "grad_norm": 1.3031202554702759, "learning_rate": 1.3708401665825319e-05, "loss": 0.9976, "step": 1940 }, { "epoch": 0.397990567972114, "grad_norm": 1.248643159866333, "learning_rate": 1.3702232588806354e-05, "loss": 0.9326, "step": 1941 }, { "epoch": 0.39819561205659215, "grad_norm": 1.3708044290542603, "learning_rate": 1.3696061878580707e-05, "loss": 1.0235, "step": 1942 }, { "epoch": 0.39840065614107034, "grad_norm": 1.389230728149414, "learning_rate": 1.3689889537870537e-05, "loss": 0.9773, "step": 1943 }, { "epoch": 0.3986057002255485, "grad_norm": 1.2065953016281128, "learning_rate": 1.3683715569398714e-05, "loss": 0.9004, "step": 1944 }, { "epoch": 0.39881074431002667, "grad_norm": 1.2191917896270752, "learning_rate": 1.3677539975888828e-05, "loss": 0.9713, "step": 1945 }, { "epoch": 0.3990157883945048, "grad_norm": 1.1945184469223022, "learning_rate": 1.3671362760065188e-05, "loss": 1.0415, "step": 1946 }, { "epoch": 0.399220832478983, "grad_norm": 1.2885630130767822, "learning_rate": 1.3665183924652817e-05, "loss": 0.9327, "step": 1947 }, { "epoch": 0.3994258765634611, "grad_norm": 1.215341329574585, "learning_rate": 1.3659003472377453e-05, "loss": 0.9302, "step": 1948 }, { "epoch": 0.3996309206479393, "grad_norm": 1.2615835666656494, "learning_rate": 1.3652821405965546e-05, "loss": 0.8639, "step": 1949 }, { "epoch": 0.39983596473241745, "grad_norm": 1.3547414541244507, "learning_rate": 1.3646637728144259e-05, "loss": 0.9387, "step": 1950 }, { "epoch": 0.40004100881689564, "grad_norm": 1.3475233316421509, "learning_rate": 1.3640452441641466e-05, "loss": 0.9359, "step": 1951 }, { "epoch": 0.40024605290137377, "grad_norm": 1.1840912103652954, "learning_rate": 1.3634265549185755e-05, "loss": 0.9109, "step": 1952 }, { "epoch": 0.40045109698585196, "grad_norm": 1.2599481344223022, "learning_rate": 1.362807705350641e-05, "loss": 0.9767, "step": 1953 }, { "epoch": 0.4006561410703301, "grad_norm": 1.2696270942687988, "learning_rate": 1.362188695733344e-05, "loss": 0.9726, "step": 1954 }, { "epoch": 0.4008611851548083, "grad_norm": 1.2692196369171143, "learning_rate": 1.361569526339754e-05, "loss": 0.905, "step": 1955 }, { "epoch": 0.4010662292392864, "grad_norm": 1.1977014541625977, "learning_rate": 1.360950197443013e-05, "loss": 0.9575, "step": 1956 }, { "epoch": 0.4012712733237646, "grad_norm": 1.2939395904541016, "learning_rate": 1.3603307093163319e-05, "loss": 0.9381, "step": 1957 }, { "epoch": 0.4014763174082428, "grad_norm": 1.2537357807159424, "learning_rate": 1.359711062232992e-05, "loss": 0.9775, "step": 1958 }, { "epoch": 0.40168136149272093, "grad_norm": 1.2243276834487915, "learning_rate": 1.3590912564663457e-05, "loss": 0.9808, "step": 1959 }, { "epoch": 0.4018864055771991, "grad_norm": 1.2946988344192505, "learning_rate": 1.3584712922898143e-05, "loss": 0.9512, "step": 1960 }, { "epoch": 0.40209144966167726, "grad_norm": 1.267085075378418, "learning_rate": 1.3578511699768897e-05, "loss": 0.9861, "step": 1961 }, { "epoch": 0.40229649374615545, "grad_norm": 1.2455105781555176, "learning_rate": 1.3572308898011328e-05, "loss": 0.97, "step": 1962 }, { "epoch": 0.4025015378306336, "grad_norm": 1.276058316230774, "learning_rate": 1.3566104520361757e-05, "loss": 0.9981, "step": 1963 }, { "epoch": 0.40270658191511177, "grad_norm": 1.3415942192077637, "learning_rate": 1.3559898569557178e-05, "loss": 0.9236, "step": 1964 }, { "epoch": 0.4029116259995899, "grad_norm": 1.2153782844543457, "learning_rate": 1.3553691048335296e-05, "loss": 0.8338, "step": 1965 }, { "epoch": 0.4031166700840681, "grad_norm": 1.2991911172866821, "learning_rate": 1.35474819594345e-05, "loss": 0.9714, "step": 1966 }, { "epoch": 0.40332171416854623, "grad_norm": 1.3274779319763184, "learning_rate": 1.3541271305593878e-05, "loss": 0.9843, "step": 1967 }, { "epoch": 0.4035267582530244, "grad_norm": 1.2511221170425415, "learning_rate": 1.3535059089553204e-05, "loss": 0.9295, "step": 1968 }, { "epoch": 0.40373180233750255, "grad_norm": 1.2284562587738037, "learning_rate": 1.3528845314052937e-05, "loss": 0.8231, "step": 1969 }, { "epoch": 0.40393684642198074, "grad_norm": 1.1660737991333008, "learning_rate": 1.3522629981834234e-05, "loss": 0.9233, "step": 1970 }, { "epoch": 0.4041418905064589, "grad_norm": 1.2390059232711792, "learning_rate": 1.3516413095638928e-05, "loss": 0.932, "step": 1971 }, { "epoch": 0.40434693459093707, "grad_norm": 1.3616958856582642, "learning_rate": 1.3510194658209547e-05, "loss": 1.0168, "step": 1972 }, { "epoch": 0.4045519786754152, "grad_norm": 1.3217259645462036, "learning_rate": 1.3503974672289295e-05, "loss": 0.9187, "step": 1973 }, { "epoch": 0.4047570227598934, "grad_norm": 1.2543007135391235, "learning_rate": 1.3497753140622063e-05, "loss": 1.0072, "step": 1974 }, { "epoch": 0.4049620668443715, "grad_norm": 1.2200525999069214, "learning_rate": 1.349153006595243e-05, "loss": 0.8839, "step": 1975 }, { "epoch": 0.4051671109288497, "grad_norm": 1.224489688873291, "learning_rate": 1.3485305451025639e-05, "loss": 0.9108, "step": 1976 }, { "epoch": 0.40537215501332785, "grad_norm": 1.2305368185043335, "learning_rate": 1.3479079298587634e-05, "loss": 0.9436, "step": 1977 }, { "epoch": 0.40557719909780604, "grad_norm": 1.2733553647994995, "learning_rate": 1.3472851611385019e-05, "loss": 0.9583, "step": 1978 }, { "epoch": 0.4057822431822842, "grad_norm": 1.4426255226135254, "learning_rate": 1.3466622392165083e-05, "loss": 0.9515, "step": 1979 }, { "epoch": 0.40598728726676236, "grad_norm": 1.278005838394165, "learning_rate": 1.3460391643675794e-05, "loss": 0.9686, "step": 1980 }, { "epoch": 0.4061923313512405, "grad_norm": 1.2938700914382935, "learning_rate": 1.3454159368665786e-05, "loss": 0.9428, "step": 1981 }, { "epoch": 0.4063973754357187, "grad_norm": 1.2830404043197632, "learning_rate": 1.3447925569884374e-05, "loss": 0.9479, "step": 1982 }, { "epoch": 0.4066024195201968, "grad_norm": 1.361611247062683, "learning_rate": 1.3441690250081544e-05, "loss": 0.9892, "step": 1983 }, { "epoch": 0.406807463604675, "grad_norm": 1.1654868125915527, "learning_rate": 1.3435453412007949e-05, "loss": 0.9345, "step": 1984 }, { "epoch": 0.40701250768915315, "grad_norm": 1.2981793880462646, "learning_rate": 1.3429215058414913e-05, "loss": 0.9305, "step": 1985 }, { "epoch": 0.40721755177363134, "grad_norm": 1.4179637432098389, "learning_rate": 1.3422975192054433e-05, "loss": 1.0061, "step": 1986 }, { "epoch": 0.40742259585810947, "grad_norm": 1.150522232055664, "learning_rate": 1.3416733815679166e-05, "loss": 0.959, "step": 1987 }, { "epoch": 0.40762763994258766, "grad_norm": 1.2209819555282593, "learning_rate": 1.3410490932042443e-05, "loss": 0.8713, "step": 1988 }, { "epoch": 0.4078326840270658, "grad_norm": 1.2722630500793457, "learning_rate": 1.3404246543898257e-05, "loss": 0.9417, "step": 1989 }, { "epoch": 0.408037728111544, "grad_norm": 1.35453200340271, "learning_rate": 1.3398000654001255e-05, "loss": 0.931, "step": 1990 }, { "epoch": 0.4082427721960221, "grad_norm": 1.4035252332687378, "learning_rate": 1.3391753265106766e-05, "loss": 1.0012, "step": 1991 }, { "epoch": 0.4084478162805003, "grad_norm": 1.27195405960083, "learning_rate": 1.3385504379970764e-05, "loss": 0.969, "step": 1992 }, { "epoch": 0.40865286036497844, "grad_norm": 1.2418628931045532, "learning_rate": 1.3379254001349891e-05, "loss": 0.9474, "step": 1993 }, { "epoch": 0.40885790444945663, "grad_norm": 1.3122360706329346, "learning_rate": 1.3373002132001441e-05, "loss": 0.9439, "step": 1994 }, { "epoch": 0.4090629485339348, "grad_norm": 1.2832043170928955, "learning_rate": 1.3366748774683376e-05, "loss": 1.0259, "step": 1995 }, { "epoch": 0.40926799261841296, "grad_norm": 1.4200867414474487, "learning_rate": 1.3360493932154301e-05, "loss": 1.0502, "step": 1996 }, { "epoch": 0.40947303670289115, "grad_norm": 1.3023254871368408, "learning_rate": 1.3354237607173494e-05, "loss": 0.9291, "step": 1997 }, { "epoch": 0.4096780807873693, "grad_norm": 1.3132773637771606, "learning_rate": 1.3347979802500869e-05, "loss": 0.9637, "step": 1998 }, { "epoch": 0.40988312487184747, "grad_norm": 1.3185020685195923, "learning_rate": 1.3341720520897002e-05, "loss": 0.9822, "step": 1999 }, { "epoch": 0.4100881689563256, "grad_norm": 1.1975475549697876, "learning_rate": 1.3335459765123117e-05, "loss": 0.883, "step": 2000 }, { "epoch": 0.4102932130408038, "grad_norm": 1.2658559083938599, "learning_rate": 1.3329197537941093e-05, "loss": 0.9049, "step": 2001 }, { "epoch": 0.41049825712528193, "grad_norm": 1.212607502937317, "learning_rate": 1.3322933842113457e-05, "loss": 0.9581, "step": 2002 }, { "epoch": 0.4107033012097601, "grad_norm": 1.2419854402542114, "learning_rate": 1.3316668680403377e-05, "loss": 0.9231, "step": 2003 }, { "epoch": 0.41090834529423825, "grad_norm": 1.3973459005355835, "learning_rate": 1.331040205557468e-05, "loss": 0.9427, "step": 2004 }, { "epoch": 0.41111338937871644, "grad_norm": 1.4347774982452393, "learning_rate": 1.3304133970391826e-05, "loss": 1.0058, "step": 2005 }, { "epoch": 0.4113184334631946, "grad_norm": 1.4041903018951416, "learning_rate": 1.3297864427619925e-05, "loss": 0.951, "step": 2006 }, { "epoch": 0.41152347754767277, "grad_norm": 1.2029978036880493, "learning_rate": 1.3291593430024727e-05, "loss": 0.9414, "step": 2007 }, { "epoch": 0.4117285216321509, "grad_norm": 1.26981520652771, "learning_rate": 1.3285320980372634e-05, "loss": 1.0293, "step": 2008 }, { "epoch": 0.4119335657166291, "grad_norm": 1.2132817506790161, "learning_rate": 1.327904708143067e-05, "loss": 0.8997, "step": 2009 }, { "epoch": 0.4121386098011072, "grad_norm": 1.2365838289260864, "learning_rate": 1.3272771735966523e-05, "loss": 0.9174, "step": 2010 }, { "epoch": 0.4123436538855854, "grad_norm": 1.3403139114379883, "learning_rate": 1.3266494946748494e-05, "loss": 0.8354, "step": 2011 }, { "epoch": 0.41254869797006355, "grad_norm": 1.2411123514175415, "learning_rate": 1.3260216716545534e-05, "loss": 0.9293, "step": 2012 }, { "epoch": 0.41275374205454174, "grad_norm": 1.2735531330108643, "learning_rate": 1.3253937048127235e-05, "loss": 0.8597, "step": 2013 }, { "epoch": 0.4129587861390199, "grad_norm": 1.242503046989441, "learning_rate": 1.3247655944263807e-05, "loss": 0.9935, "step": 2014 }, { "epoch": 0.41316383022349806, "grad_norm": 1.147810935974121, "learning_rate": 1.3241373407726109e-05, "loss": 0.9601, "step": 2015 }, { "epoch": 0.4133688743079762, "grad_norm": 1.3358858823776245, "learning_rate": 1.323508944128562e-05, "loss": 1.0042, "step": 2016 }, { "epoch": 0.4135739183924544, "grad_norm": 1.3609365224838257, "learning_rate": 1.3228804047714462e-05, "loss": 1.0115, "step": 2017 }, { "epoch": 0.4137789624769325, "grad_norm": 1.316086769104004, "learning_rate": 1.3222517229785377e-05, "loss": 0.98, "step": 2018 }, { "epoch": 0.4139840065614107, "grad_norm": 1.355149507522583, "learning_rate": 1.321622899027174e-05, "loss": 0.8885, "step": 2019 }, { "epoch": 0.41418905064588885, "grad_norm": 1.2691209316253662, "learning_rate": 1.3209939331947545e-05, "loss": 0.9939, "step": 2020 }, { "epoch": 0.41439409473036704, "grad_norm": 1.2067055702209473, "learning_rate": 1.3203648257587427e-05, "loss": 0.9192, "step": 2021 }, { "epoch": 0.41459913881484517, "grad_norm": 1.478704571723938, "learning_rate": 1.319735576996663e-05, "loss": 0.9769, "step": 2022 }, { "epoch": 0.41480418289932336, "grad_norm": 1.3093018531799316, "learning_rate": 1.3191061871861033e-05, "loss": 0.8864, "step": 2023 }, { "epoch": 0.4150092269838015, "grad_norm": 1.3148784637451172, "learning_rate": 1.3184766566047131e-05, "loss": 0.9649, "step": 2024 }, { "epoch": 0.4152142710682797, "grad_norm": 1.3329700231552124, "learning_rate": 1.3178469855302042e-05, "loss": 0.996, "step": 2025 }, { "epoch": 0.4154193151527578, "grad_norm": 1.2328191995620728, "learning_rate": 1.3172171742403504e-05, "loss": 0.9346, "step": 2026 }, { "epoch": 0.415624359237236, "grad_norm": 1.2414798736572266, "learning_rate": 1.3165872230129869e-05, "loss": 0.8788, "step": 2027 }, { "epoch": 0.41582940332171414, "grad_norm": 1.2881338596343994, "learning_rate": 1.3159571321260114e-05, "loss": 0.97, "step": 2028 }, { "epoch": 0.41603444740619233, "grad_norm": 1.2928085327148438, "learning_rate": 1.3153269018573828e-05, "loss": 0.963, "step": 2029 }, { "epoch": 0.4162394914906705, "grad_norm": 1.2234294414520264, "learning_rate": 1.3146965324851215e-05, "loss": 0.9435, "step": 2030 }, { "epoch": 0.41644453557514866, "grad_norm": 1.2792755365371704, "learning_rate": 1.3140660242873093e-05, "loss": 0.9029, "step": 2031 }, { "epoch": 0.41664957965962685, "grad_norm": 1.2977018356323242, "learning_rate": 1.3134353775420895e-05, "loss": 1.0058, "step": 2032 }, { "epoch": 0.416854623744105, "grad_norm": 1.3505876064300537, "learning_rate": 1.3128045925276655e-05, "loss": 0.8893, "step": 2033 }, { "epoch": 0.41705966782858317, "grad_norm": 1.1744633913040161, "learning_rate": 1.3121736695223032e-05, "loss": 0.9842, "step": 2034 }, { "epoch": 0.4172647119130613, "grad_norm": 1.241326093673706, "learning_rate": 1.3115426088043284e-05, "loss": 0.9451, "step": 2035 }, { "epoch": 0.4174697559975395, "grad_norm": 1.2462495565414429, "learning_rate": 1.3109114106521277e-05, "loss": 0.951, "step": 2036 }, { "epoch": 0.4176748000820176, "grad_norm": 1.2187288999557495, "learning_rate": 1.3102800753441488e-05, "loss": 0.8854, "step": 2037 }, { "epoch": 0.4178798441664958, "grad_norm": 1.274042010307312, "learning_rate": 1.3096486031588993e-05, "loss": 0.8924, "step": 2038 }, { "epoch": 0.41808488825097395, "grad_norm": 1.342455506324768, "learning_rate": 1.3090169943749475e-05, "loss": 0.926, "step": 2039 }, { "epoch": 0.41828993233545214, "grad_norm": 1.3962559700012207, "learning_rate": 1.3083852492709223e-05, "loss": 0.9811, "step": 2040 }, { "epoch": 0.4184949764199303, "grad_norm": 1.2426050901412964, "learning_rate": 1.307753368125512e-05, "loss": 0.9374, "step": 2041 }, { "epoch": 0.41870002050440847, "grad_norm": 1.3201824426651, "learning_rate": 1.3071213512174655e-05, "loss": 0.9304, "step": 2042 }, { "epoch": 0.4189050645888866, "grad_norm": 1.443189263343811, "learning_rate": 1.3064891988255913e-05, "loss": 0.964, "step": 2043 }, { "epoch": 0.4191101086733648, "grad_norm": 1.2743784189224243, "learning_rate": 1.3058569112287577e-05, "loss": 0.989, "step": 2044 }, { "epoch": 0.4193151527578429, "grad_norm": 1.2460218667984009, "learning_rate": 1.3052244887058927e-05, "loss": 0.9292, "step": 2045 }, { "epoch": 0.4195201968423211, "grad_norm": 1.2891780138015747, "learning_rate": 1.3045919315359843e-05, "loss": 0.9597, "step": 2046 }, { "epoch": 0.41972524092679925, "grad_norm": 1.3233346939086914, "learning_rate": 1.3039592399980785e-05, "loss": 0.8759, "step": 2047 }, { "epoch": 0.41993028501127744, "grad_norm": 1.2963885068893433, "learning_rate": 1.303326414371282e-05, "loss": 0.9641, "step": 2048 }, { "epoch": 0.42013532909575557, "grad_norm": 1.3123456239700317, "learning_rate": 1.3026934549347608e-05, "loss": 0.9656, "step": 2049 }, { "epoch": 0.42034037318023376, "grad_norm": 1.2872518301010132, "learning_rate": 1.3020603619677378e-05, "loss": 0.9235, "step": 2050 }, { "epoch": 0.4205454172647119, "grad_norm": 1.333008885383606, "learning_rate": 1.301427135749498e-05, "loss": 0.9062, "step": 2051 }, { "epoch": 0.4207504613491901, "grad_norm": 1.2146364450454712, "learning_rate": 1.3007937765593818e-05, "loss": 0.9779, "step": 2052 }, { "epoch": 0.4209555054336682, "grad_norm": 1.304964303970337, "learning_rate": 1.300160284676791e-05, "loss": 0.9832, "step": 2053 }, { "epoch": 0.4211605495181464, "grad_norm": 1.2901372909545898, "learning_rate": 1.2995266603811847e-05, "loss": 0.9415, "step": 2054 }, { "epoch": 0.42136559360262454, "grad_norm": 1.2498575448989868, "learning_rate": 1.2988929039520804e-05, "loss": 0.9432, "step": 2055 }, { "epoch": 0.42157063768710273, "grad_norm": 1.2925161123275757, "learning_rate": 1.298259015669054e-05, "loss": 0.9765, "step": 2056 }, { "epoch": 0.42177568177158087, "grad_norm": 1.2827202081680298, "learning_rate": 1.2976249958117395e-05, "loss": 0.955, "step": 2057 }, { "epoch": 0.42198072585605906, "grad_norm": 1.2293387651443481, "learning_rate": 1.2969908446598295e-05, "loss": 0.9922, "step": 2058 }, { "epoch": 0.4221857699405372, "grad_norm": 1.2721004486083984, "learning_rate": 1.2963565624930739e-05, "loss": 0.9728, "step": 2059 }, { "epoch": 0.4223908140250154, "grad_norm": 1.2600921392440796, "learning_rate": 1.2957221495912804e-05, "loss": 0.8872, "step": 2060 }, { "epoch": 0.4225958581094935, "grad_norm": 1.3756204843521118, "learning_rate": 1.2950876062343147e-05, "loss": 0.8603, "step": 2061 }, { "epoch": 0.4228009021939717, "grad_norm": 1.1304603815078735, "learning_rate": 1.2944529327021002e-05, "loss": 0.9714, "step": 2062 }, { "epoch": 0.42300594627844984, "grad_norm": 1.4137669801712036, "learning_rate": 1.2938181292746168e-05, "loss": 0.9204, "step": 2063 }, { "epoch": 0.42321099036292803, "grad_norm": 1.4239956140518188, "learning_rate": 1.2931831962319027e-05, "loss": 0.9343, "step": 2064 }, { "epoch": 0.4234160344474062, "grad_norm": 1.4117389917373657, "learning_rate": 1.2925481338540532e-05, "loss": 0.9169, "step": 2065 }, { "epoch": 0.42362107853188435, "grad_norm": 1.3357900381088257, "learning_rate": 1.2919129424212198e-05, "loss": 0.9876, "step": 2066 }, { "epoch": 0.42382612261636254, "grad_norm": 1.3489292860031128, "learning_rate": 1.291277622213612e-05, "loss": 0.9294, "step": 2067 }, { "epoch": 0.4240311667008407, "grad_norm": 1.246017336845398, "learning_rate": 1.2906421735114951e-05, "loss": 0.9677, "step": 2068 }, { "epoch": 0.42423621078531887, "grad_norm": 1.2862412929534912, "learning_rate": 1.2900065965951922e-05, "loss": 0.9362, "step": 2069 }, { "epoch": 0.424441254869797, "grad_norm": 1.3013298511505127, "learning_rate": 1.289370891745082e-05, "loss": 0.8911, "step": 2070 }, { "epoch": 0.4246462989542752, "grad_norm": 1.2339649200439453, "learning_rate": 1.2887350592415996e-05, "loss": 0.9355, "step": 2071 }, { "epoch": 0.4248513430387533, "grad_norm": 1.2856383323669434, "learning_rate": 1.2880990993652379e-05, "loss": 0.9449, "step": 2072 }, { "epoch": 0.4250563871232315, "grad_norm": 1.1992894411087036, "learning_rate": 1.2874630123965438e-05, "loss": 0.8687, "step": 2073 }, { "epoch": 0.42526143120770965, "grad_norm": 1.3797056674957275, "learning_rate": 1.2868267986161216e-05, "loss": 0.934, "step": 2074 }, { "epoch": 0.42546647529218784, "grad_norm": 1.310847520828247, "learning_rate": 1.2861904583046316e-05, "loss": 0.8828, "step": 2075 }, { "epoch": 0.425671519376666, "grad_norm": 1.18584144115448, "learning_rate": 1.2855539917427895e-05, "loss": 0.9165, "step": 2076 }, { "epoch": 0.42587656346114416, "grad_norm": 1.4504402875900269, "learning_rate": 1.2849173992113669e-05, "loss": 0.8947, "step": 2077 }, { "epoch": 0.4260816075456223, "grad_norm": 1.2662636041641235, "learning_rate": 1.2842806809911904e-05, "loss": 0.9119, "step": 2078 }, { "epoch": 0.4262866516301005, "grad_norm": 1.5100280046463013, "learning_rate": 1.2836438373631432e-05, "loss": 0.8912, "step": 2079 }, { "epoch": 0.4264916957145786, "grad_norm": 1.2079657316207886, "learning_rate": 1.2830068686081629e-05, "loss": 0.9942, "step": 2080 }, { "epoch": 0.4266967397990568, "grad_norm": 1.2589783668518066, "learning_rate": 1.2823697750072424e-05, "loss": 0.9819, "step": 2081 }, { "epoch": 0.42690178388353495, "grad_norm": 1.3818743228912354, "learning_rate": 1.2817325568414299e-05, "loss": 0.9163, "step": 2082 }, { "epoch": 0.42710682796801314, "grad_norm": 1.2502152919769287, "learning_rate": 1.2810952143918284e-05, "loss": 0.9861, "step": 2083 }, { "epoch": 0.42731187205249127, "grad_norm": 1.337384581565857, "learning_rate": 1.2804577479395959e-05, "loss": 0.9466, "step": 2084 }, { "epoch": 0.42751691613696946, "grad_norm": 1.2355762720108032, "learning_rate": 1.2798201577659453e-05, "loss": 0.8767, "step": 2085 }, { "epoch": 0.4277219602214476, "grad_norm": 1.283914566040039, "learning_rate": 1.2791824441521434e-05, "loss": 0.9699, "step": 2086 }, { "epoch": 0.4279270043059258, "grad_norm": 1.267960548400879, "learning_rate": 1.2785446073795118e-05, "loss": 0.93, "step": 2087 }, { "epoch": 0.4281320483904039, "grad_norm": 1.2826616764068604, "learning_rate": 1.2779066477294266e-05, "loss": 0.9737, "step": 2088 }, { "epoch": 0.4283370924748821, "grad_norm": 1.1683244705200195, "learning_rate": 1.2772685654833182e-05, "loss": 0.9261, "step": 2089 }, { "epoch": 0.42854213655936024, "grad_norm": 1.2516214847564697, "learning_rate": 1.2766303609226702e-05, "loss": 0.8946, "step": 2090 }, { "epoch": 0.42874718064383843, "grad_norm": 1.1973334550857544, "learning_rate": 1.2759920343290215e-05, "loss": 0.9515, "step": 2091 }, { "epoch": 0.42895222472831657, "grad_norm": 1.2982745170593262, "learning_rate": 1.2753535859839638e-05, "loss": 0.9653, "step": 2092 }, { "epoch": 0.42915726881279476, "grad_norm": 1.3907631635665894, "learning_rate": 1.2747150161691428e-05, "loss": 0.9018, "step": 2093 }, { "epoch": 0.4293623128972729, "grad_norm": 1.3286476135253906, "learning_rate": 1.2740763251662585e-05, "loss": 0.9301, "step": 2094 }, { "epoch": 0.4295673569817511, "grad_norm": 1.235525131225586, "learning_rate": 1.2734375132570627e-05, "loss": 0.8607, "step": 2095 }, { "epoch": 0.4297724010662292, "grad_norm": 1.2332781553268433, "learning_rate": 1.2727985807233623e-05, "loss": 0.905, "step": 2096 }, { "epoch": 0.4299774451507074, "grad_norm": 1.2386506795883179, "learning_rate": 1.272159527847016e-05, "loss": 0.9334, "step": 2097 }, { "epoch": 0.43018248923518554, "grad_norm": 1.285266399383545, "learning_rate": 1.271520354909937e-05, "loss": 0.9561, "step": 2098 }, { "epoch": 0.43038753331966373, "grad_norm": 1.245800495147705, "learning_rate": 1.27088106219409e-05, "loss": 0.9387, "step": 2099 }, { "epoch": 0.43059257740414186, "grad_norm": 1.2779017686843872, "learning_rate": 1.2702416499814938e-05, "loss": 0.8848, "step": 2100 }, { "epoch": 0.43079762148862005, "grad_norm": 1.1886284351348877, "learning_rate": 1.2696021185542192e-05, "loss": 0.864, "step": 2101 }, { "epoch": 0.43100266557309824, "grad_norm": 1.3204153776168823, "learning_rate": 1.2689624681943897e-05, "loss": 0.935, "step": 2102 }, { "epoch": 0.4312077096575764, "grad_norm": 1.251814365386963, "learning_rate": 1.2683226991841812e-05, "loss": 0.9305, "step": 2103 }, { "epoch": 0.43141275374205457, "grad_norm": 1.1794066429138184, "learning_rate": 1.267682811805822e-05, "loss": 0.9772, "step": 2104 }, { "epoch": 0.4316177978265327, "grad_norm": 1.1935679912567139, "learning_rate": 1.2670428063415932e-05, "loss": 0.9387, "step": 2105 }, { "epoch": 0.4318228419110109, "grad_norm": 1.3482578992843628, "learning_rate": 1.2664026830738267e-05, "loss": 1.0544, "step": 2106 }, { "epoch": 0.432027885995489, "grad_norm": 1.2081375122070312, "learning_rate": 1.2657624422849077e-05, "loss": 0.893, "step": 2107 }, { "epoch": 0.4322329300799672, "grad_norm": 1.192569613456726, "learning_rate": 1.2651220842572724e-05, "loss": 0.9443, "step": 2108 }, { "epoch": 0.43243797416444535, "grad_norm": 1.5941976308822632, "learning_rate": 1.2644816092734092e-05, "loss": 0.9501, "step": 2109 }, { "epoch": 0.43264301824892354, "grad_norm": 1.2809563875198364, "learning_rate": 1.2638410176158577e-05, "loss": 0.9214, "step": 2110 }, { "epoch": 0.4328480623334017, "grad_norm": 1.2845559120178223, "learning_rate": 1.2632003095672092e-05, "loss": 0.918, "step": 2111 }, { "epoch": 0.43305310641787986, "grad_norm": 1.2910913228988647, "learning_rate": 1.2625594854101066e-05, "loss": 0.9535, "step": 2112 }, { "epoch": 0.433258150502358, "grad_norm": 1.3809291124343872, "learning_rate": 1.2619185454272431e-05, "loss": 0.9563, "step": 2113 }, { "epoch": 0.4334631945868362, "grad_norm": 1.36131751537323, "learning_rate": 1.2612774899013644e-05, "loss": 0.8969, "step": 2114 }, { "epoch": 0.4336682386713143, "grad_norm": 1.0982556343078613, "learning_rate": 1.2606363191152657e-05, "loss": 0.9311, "step": 2115 }, { "epoch": 0.4338732827557925, "grad_norm": 1.3871488571166992, "learning_rate": 1.2599950333517943e-05, "loss": 0.9211, "step": 2116 }, { "epoch": 0.43407832684027065, "grad_norm": 1.1464545726776123, "learning_rate": 1.2593536328938471e-05, "loss": 0.8953, "step": 2117 }, { "epoch": 0.43428337092474883, "grad_norm": 1.2260444164276123, "learning_rate": 1.2587121180243728e-05, "loss": 0.9459, "step": 2118 }, { "epoch": 0.43448841500922697, "grad_norm": 1.1736198663711548, "learning_rate": 1.2580704890263695e-05, "loss": 0.9488, "step": 2119 }, { "epoch": 0.43469345909370516, "grad_norm": 1.167196273803711, "learning_rate": 1.257428746182886e-05, "loss": 0.9347, "step": 2120 }, { "epoch": 0.4348985031781833, "grad_norm": 1.2572448253631592, "learning_rate": 1.2567868897770217e-05, "loss": 1.0498, "step": 2121 }, { "epoch": 0.4351035472626615, "grad_norm": 1.384202480316162, "learning_rate": 1.2561449200919253e-05, "loss": 0.9856, "step": 2122 }, { "epoch": 0.4353085913471396, "grad_norm": 1.3043608665466309, "learning_rate": 1.2555028374107967e-05, "loss": 0.9046, "step": 2123 }, { "epoch": 0.4355136354316178, "grad_norm": 1.1506309509277344, "learning_rate": 1.2548606420168842e-05, "loss": 0.9163, "step": 2124 }, { "epoch": 0.43571867951609594, "grad_norm": 1.3649702072143555, "learning_rate": 1.2542183341934873e-05, "loss": 0.9369, "step": 2125 }, { "epoch": 0.43592372360057413, "grad_norm": 1.2657591104507446, "learning_rate": 1.2535759142239535e-05, "loss": 1.0164, "step": 2126 }, { "epoch": 0.43612876768505227, "grad_norm": 1.28702712059021, "learning_rate": 1.2529333823916807e-05, "loss": 0.9476, "step": 2127 }, { "epoch": 0.43633381176953046, "grad_norm": 1.2295769453048706, "learning_rate": 1.2522907389801168e-05, "loss": 0.968, "step": 2128 }, { "epoch": 0.4365388558540086, "grad_norm": 1.3246909379959106, "learning_rate": 1.2516479842727576e-05, "loss": 0.969, "step": 2129 }, { "epoch": 0.4367438999384868, "grad_norm": 1.2764945030212402, "learning_rate": 1.251005118553149e-05, "loss": 0.9229, "step": 2130 }, { "epoch": 0.4369489440229649, "grad_norm": 1.2423577308654785, "learning_rate": 1.2503621421048844e-05, "loss": 0.985, "step": 2131 }, { "epoch": 0.4371539881074431, "grad_norm": 1.2272076606750488, "learning_rate": 1.2497190552116082e-05, "loss": 0.9419, "step": 2132 }, { "epoch": 0.43735903219192124, "grad_norm": 1.2774066925048828, "learning_rate": 1.2490758581570122e-05, "loss": 0.9207, "step": 2133 }, { "epoch": 0.4375640762763994, "grad_norm": 1.268904209136963, "learning_rate": 1.2484325512248366e-05, "loss": 0.9705, "step": 2134 }, { "epoch": 0.43776912036087756, "grad_norm": 1.2201067209243774, "learning_rate": 1.2477891346988709e-05, "loss": 0.9197, "step": 2135 }, { "epoch": 0.43797416444535575, "grad_norm": 1.3025321960449219, "learning_rate": 1.2471456088629522e-05, "loss": 0.903, "step": 2136 }, { "epoch": 0.43817920852983394, "grad_norm": 1.174176812171936, "learning_rate": 1.2465019740009662e-05, "loss": 0.8373, "step": 2137 }, { "epoch": 0.4383842526143121, "grad_norm": 1.2236599922180176, "learning_rate": 1.2458582303968466e-05, "loss": 0.9583, "step": 2138 }, { "epoch": 0.43858929669879027, "grad_norm": 1.3510493040084839, "learning_rate": 1.2452143783345756e-05, "loss": 0.94, "step": 2139 }, { "epoch": 0.4387943407832684, "grad_norm": 1.4092227220535278, "learning_rate": 1.2445704180981821e-05, "loss": 0.9665, "step": 2140 }, { "epoch": 0.4389993848677466, "grad_norm": 1.3063156604766846, "learning_rate": 1.243926349971744e-05, "loss": 0.9769, "step": 2141 }, { "epoch": 0.4392044289522247, "grad_norm": 1.279961109161377, "learning_rate": 1.2432821742393854e-05, "loss": 0.9657, "step": 2142 }, { "epoch": 0.4394094730367029, "grad_norm": 1.2540613412857056, "learning_rate": 1.2426378911852794e-05, "loss": 0.8702, "step": 2143 }, { "epoch": 0.43961451712118105, "grad_norm": 1.2869551181793213, "learning_rate": 1.2419935010936454e-05, "loss": 0.9421, "step": 2144 }, { "epoch": 0.43981956120565924, "grad_norm": 1.3655623197555542, "learning_rate": 1.2413490042487501e-05, "loss": 0.9778, "step": 2145 }, { "epoch": 0.44002460529013737, "grad_norm": 1.3045989274978638, "learning_rate": 1.240704400934908e-05, "loss": 0.885, "step": 2146 }, { "epoch": 0.44022964937461556, "grad_norm": 1.152719497680664, "learning_rate": 1.2400596914364792e-05, "loss": 0.8637, "step": 2147 }, { "epoch": 0.4404346934590937, "grad_norm": 1.189630389213562, "learning_rate": 1.2394148760378726e-05, "loss": 0.9367, "step": 2148 }, { "epoch": 0.4406397375435719, "grad_norm": 1.1538126468658447, "learning_rate": 1.2387699550235419e-05, "loss": 0.8805, "step": 2149 }, { "epoch": 0.44084478162805, "grad_norm": 1.2972817420959473, "learning_rate": 1.2381249286779889e-05, "loss": 0.893, "step": 2150 }, { "epoch": 0.4410498257125282, "grad_norm": 1.170898199081421, "learning_rate": 1.2374797972857603e-05, "loss": 0.9412, "step": 2151 }, { "epoch": 0.44125486979700634, "grad_norm": 1.213096261024475, "learning_rate": 1.2368345611314508e-05, "loss": 0.9305, "step": 2152 }, { "epoch": 0.44145991388148453, "grad_norm": 1.3289973735809326, "learning_rate": 1.2361892204997002e-05, "loss": 0.9926, "step": 2153 }, { "epoch": 0.44166495796596267, "grad_norm": 1.239643931388855, "learning_rate": 1.2355437756751944e-05, "loss": 1.0039, "step": 2154 }, { "epoch": 0.44187000205044086, "grad_norm": 1.3061137199401855, "learning_rate": 1.2348982269426666e-05, "loss": 0.9197, "step": 2155 }, { "epoch": 0.442075046134919, "grad_norm": 1.2053015232086182, "learning_rate": 1.2342525745868937e-05, "loss": 0.9821, "step": 2156 }, { "epoch": 0.4422800902193972, "grad_norm": 1.1720904111862183, "learning_rate": 1.2336068188927002e-05, "loss": 0.924, "step": 2157 }, { "epoch": 0.4424851343038753, "grad_norm": 1.2194730043411255, "learning_rate": 1.232960960144955e-05, "loss": 0.9042, "step": 2158 }, { "epoch": 0.4426901783883535, "grad_norm": 1.324665904045105, "learning_rate": 1.2323149986285731e-05, "loss": 0.9629, "step": 2159 }, { "epoch": 0.44289522247283164, "grad_norm": 1.2324081659317017, "learning_rate": 1.2316689346285146e-05, "loss": 0.9688, "step": 2160 }, { "epoch": 0.44310026655730983, "grad_norm": 1.3993943929672241, "learning_rate": 1.2310227684297847e-05, "loss": 0.9966, "step": 2161 }, { "epoch": 0.44330531064178796, "grad_norm": 1.3659909963607788, "learning_rate": 1.2303765003174342e-05, "loss": 0.8951, "step": 2162 }, { "epoch": 0.44351035472626615, "grad_norm": 1.3646057844161987, "learning_rate": 1.2297301305765578e-05, "loss": 0.9307, "step": 2163 }, { "epoch": 0.4437153988107443, "grad_norm": 1.3499672412872314, "learning_rate": 1.2290836594922967e-05, "loss": 0.9307, "step": 2164 }, { "epoch": 0.4439204428952225, "grad_norm": 1.29721999168396, "learning_rate": 1.2284370873498348e-05, "loss": 0.9322, "step": 2165 }, { "epoch": 0.4441254869797006, "grad_norm": 1.2494593858718872, "learning_rate": 1.2277904144344022e-05, "loss": 0.883, "step": 2166 }, { "epoch": 0.4443305310641788, "grad_norm": 1.2439275979995728, "learning_rate": 1.2271436410312727e-05, "loss": 0.9547, "step": 2167 }, { "epoch": 0.44453557514865694, "grad_norm": 1.2922372817993164, "learning_rate": 1.2264967674257647e-05, "loss": 0.9351, "step": 2168 }, { "epoch": 0.4447406192331351, "grad_norm": 1.2410861253738403, "learning_rate": 1.2258497939032404e-05, "loss": 0.8853, "step": 2169 }, { "epoch": 0.44494566331761326, "grad_norm": 1.257621169090271, "learning_rate": 1.2252027207491062e-05, "loss": 0.9262, "step": 2170 }, { "epoch": 0.44515070740209145, "grad_norm": 1.2360305786132812, "learning_rate": 1.2245555482488134e-05, "loss": 0.9618, "step": 2171 }, { "epoch": 0.44535575148656964, "grad_norm": 1.2501929998397827, "learning_rate": 1.2239082766878557e-05, "loss": 0.9319, "step": 2172 }, { "epoch": 0.4455607955710478, "grad_norm": 1.1174309253692627, "learning_rate": 1.2232609063517713e-05, "loss": 0.8746, "step": 2173 }, { "epoch": 0.44576583965552596, "grad_norm": 1.2968558073043823, "learning_rate": 1.2226134375261418e-05, "loss": 0.9401, "step": 2174 }, { "epoch": 0.4459708837400041, "grad_norm": 1.2523632049560547, "learning_rate": 1.2219658704965926e-05, "loss": 0.9421, "step": 2175 }, { "epoch": 0.4461759278244823, "grad_norm": 1.1809699535369873, "learning_rate": 1.2213182055487913e-05, "loss": 0.9338, "step": 2176 }, { "epoch": 0.4463809719089604, "grad_norm": 1.3031485080718994, "learning_rate": 1.2206704429684504e-05, "loss": 0.9781, "step": 2177 }, { "epoch": 0.4465860159934386, "grad_norm": 1.2083632946014404, "learning_rate": 1.2200225830413236e-05, "loss": 0.9533, "step": 2178 }, { "epoch": 0.44679106007791675, "grad_norm": 1.1591615676879883, "learning_rate": 1.2193746260532094e-05, "loss": 0.9383, "step": 2179 }, { "epoch": 0.44699610416239494, "grad_norm": 1.262593388557434, "learning_rate": 1.2187265722899478e-05, "loss": 0.9407, "step": 2180 }, { "epoch": 0.44720114824687307, "grad_norm": 1.2642772197723389, "learning_rate": 1.2180784220374215e-05, "loss": 0.9174, "step": 2181 }, { "epoch": 0.44740619233135126, "grad_norm": 1.1711174249649048, "learning_rate": 1.2174301755815572e-05, "loss": 0.86, "step": 2182 }, { "epoch": 0.4476112364158294, "grad_norm": 1.1363111734390259, "learning_rate": 1.216781833208322e-05, "loss": 0.9115, "step": 2183 }, { "epoch": 0.4478162805003076, "grad_norm": 1.263528823852539, "learning_rate": 1.2161333952037269e-05, "loss": 0.9559, "step": 2184 }, { "epoch": 0.4480213245847857, "grad_norm": 1.3084056377410889, "learning_rate": 1.215484861853824e-05, "loss": 0.9486, "step": 2185 }, { "epoch": 0.4482263686692639, "grad_norm": 1.2066763639450073, "learning_rate": 1.2148362334447086e-05, "loss": 0.9284, "step": 2186 }, { "epoch": 0.44843141275374204, "grad_norm": 1.1614885330200195, "learning_rate": 1.2141875102625166e-05, "loss": 0.9063, "step": 2187 }, { "epoch": 0.44863645683822023, "grad_norm": 1.2328031063079834, "learning_rate": 1.2135386925934269e-05, "loss": 0.9772, "step": 2188 }, { "epoch": 0.44884150092269837, "grad_norm": 1.2474864721298218, "learning_rate": 1.2128897807236595e-05, "loss": 0.9876, "step": 2189 }, { "epoch": 0.44904654500717656, "grad_norm": 1.2973576784133911, "learning_rate": 1.212240774939476e-05, "loss": 0.9413, "step": 2190 }, { "epoch": 0.4492515890916547, "grad_norm": 1.1757304668426514, "learning_rate": 1.2115916755271797e-05, "loss": 0.8805, "step": 2191 }, { "epoch": 0.4494566331761329, "grad_norm": 1.3490016460418701, "learning_rate": 1.2109424827731144e-05, "loss": 0.996, "step": 2192 }, { "epoch": 0.449661677260611, "grad_norm": 1.3963443040847778, "learning_rate": 1.2102931969636664e-05, "loss": 0.9013, "step": 2193 }, { "epoch": 0.4498667213450892, "grad_norm": 1.2561250925064087, "learning_rate": 1.2096438183852617e-05, "loss": 0.9051, "step": 2194 }, { "epoch": 0.45007176542956734, "grad_norm": 1.355115532875061, "learning_rate": 1.2089943473243684e-05, "loss": 0.9663, "step": 2195 }, { "epoch": 0.45027680951404553, "grad_norm": 1.1660884618759155, "learning_rate": 1.208344784067494e-05, "loss": 0.9316, "step": 2196 }, { "epoch": 0.45048185359852366, "grad_norm": 1.3949223756790161, "learning_rate": 1.2076951289011884e-05, "loss": 0.9209, "step": 2197 }, { "epoch": 0.45068689768300185, "grad_norm": 1.3987449407577515, "learning_rate": 1.2070453821120404e-05, "loss": 0.9376, "step": 2198 }, { "epoch": 0.45089194176748, "grad_norm": 1.1862026453018188, "learning_rate": 1.2063955439866808e-05, "loss": 0.9147, "step": 2199 }, { "epoch": 0.4510969858519582, "grad_norm": 1.2755719423294067, "learning_rate": 1.2057456148117793e-05, "loss": 0.9874, "step": 2200 }, { "epoch": 0.4513020299364363, "grad_norm": 1.3418443202972412, "learning_rate": 1.205095594874046e-05, "loss": 0.9696, "step": 2201 }, { "epoch": 0.4515070740209145, "grad_norm": 1.1621017456054688, "learning_rate": 1.204445484460232e-05, "loss": 0.9216, "step": 2202 }, { "epoch": 0.45171211810539263, "grad_norm": 1.217706322669983, "learning_rate": 1.2037952838571272e-05, "loss": 0.9594, "step": 2203 }, { "epoch": 0.4519171621898708, "grad_norm": 1.36458158493042, "learning_rate": 1.2031449933515625e-05, "loss": 0.9065, "step": 2204 }, { "epoch": 0.45212220627434896, "grad_norm": 1.2592272758483887, "learning_rate": 1.2024946132304068e-05, "loss": 0.8951, "step": 2205 }, { "epoch": 0.45232725035882715, "grad_norm": 1.3092938661575317, "learning_rate": 1.2018441437805696e-05, "loss": 0.871, "step": 2206 }, { "epoch": 0.4525322944433053, "grad_norm": 1.3877543210983276, "learning_rate": 1.2011935852890004e-05, "loss": 0.9663, "step": 2207 }, { "epoch": 0.4527373385277835, "grad_norm": 1.2048581838607788, "learning_rate": 1.2005429380426865e-05, "loss": 0.9652, "step": 2208 }, { "epoch": 0.45294238261226166, "grad_norm": 1.3640835285186768, "learning_rate": 1.1998922023286557e-05, "loss": 1.0273, "step": 2209 }, { "epoch": 0.4531474266967398, "grad_norm": 1.4333652257919312, "learning_rate": 1.1992413784339734e-05, "loss": 0.9399, "step": 2210 }, { "epoch": 0.453352470781218, "grad_norm": 1.2663419246673584, "learning_rate": 1.1985904666457455e-05, "loss": 0.9052, "step": 2211 }, { "epoch": 0.4535575148656961, "grad_norm": 1.230979084968567, "learning_rate": 1.1979394672511156e-05, "loss": 0.9502, "step": 2212 }, { "epoch": 0.4537625589501743, "grad_norm": 1.356291651725769, "learning_rate": 1.1972883805372662e-05, "loss": 0.9223, "step": 2213 }, { "epoch": 0.45396760303465244, "grad_norm": 1.2698345184326172, "learning_rate": 1.1966372067914183e-05, "loss": 0.8909, "step": 2214 }, { "epoch": 0.45417264711913063, "grad_norm": 1.1475343704223633, "learning_rate": 1.1959859463008316e-05, "loss": 0.887, "step": 2215 }, { "epoch": 0.45437769120360877, "grad_norm": 1.246532917022705, "learning_rate": 1.1953345993528037e-05, "loss": 0.8509, "step": 2216 }, { "epoch": 0.45458273528808696, "grad_norm": 1.230125069618225, "learning_rate": 1.19468316623467e-05, "loss": 1.0014, "step": 2217 }, { "epoch": 0.4547877793725651, "grad_norm": 1.1989426612854004, "learning_rate": 1.1940316472338054e-05, "loss": 0.9951, "step": 2218 }, { "epoch": 0.4549928234570433, "grad_norm": 1.4807013273239136, "learning_rate": 1.1933800426376205e-05, "loss": 0.9833, "step": 2219 }, { "epoch": 0.4551978675415214, "grad_norm": 1.271411418914795, "learning_rate": 1.1927283527335657e-05, "loss": 0.9148, "step": 2220 }, { "epoch": 0.4554029116259996, "grad_norm": 1.2177817821502686, "learning_rate": 1.1920765778091276e-05, "loss": 0.8985, "step": 2221 }, { "epoch": 0.45560795571047774, "grad_norm": 1.3429381847381592, "learning_rate": 1.1914247181518312e-05, "loss": 0.9968, "step": 2222 }, { "epoch": 0.45581299979495593, "grad_norm": 1.3501895666122437, "learning_rate": 1.1907727740492386e-05, "loss": 0.9769, "step": 2223 }, { "epoch": 0.45601804387943407, "grad_norm": 1.3550963401794434, "learning_rate": 1.1901207457889485e-05, "loss": 0.8858, "step": 2224 }, { "epoch": 0.45622308796391225, "grad_norm": 1.2969087362289429, "learning_rate": 1.1894686336585982e-05, "loss": 0.9466, "step": 2225 }, { "epoch": 0.4564281320483904, "grad_norm": 1.3351205587387085, "learning_rate": 1.1888164379458603e-05, "loss": 0.9277, "step": 2226 }, { "epoch": 0.4566331761328686, "grad_norm": 1.1322940587997437, "learning_rate": 1.1881641589384456e-05, "loss": 0.9387, "step": 2227 }, { "epoch": 0.4568382202173467, "grad_norm": 1.2266242504119873, "learning_rate": 1.1875117969241008e-05, "loss": 0.9249, "step": 2228 }, { "epoch": 0.4570432643018249, "grad_norm": 1.332154393196106, "learning_rate": 1.18685935219061e-05, "loss": 0.9627, "step": 2229 }, { "epoch": 0.45724830838630304, "grad_norm": 1.2706549167633057, "learning_rate": 1.1862068250257928e-05, "loss": 0.9296, "step": 2230 }, { "epoch": 0.4574533524707812, "grad_norm": 1.2433854341506958, "learning_rate": 1.1855542157175061e-05, "loss": 0.943, "step": 2231 }, { "epoch": 0.45765839655525936, "grad_norm": 1.242612361907959, "learning_rate": 1.1849015245536424e-05, "loss": 0.9316, "step": 2232 }, { "epoch": 0.45786344063973755, "grad_norm": 1.1537002325057983, "learning_rate": 1.1842487518221306e-05, "loss": 0.9679, "step": 2233 }, { "epoch": 0.4580684847242157, "grad_norm": 1.2913169860839844, "learning_rate": 1.1835958978109356e-05, "loss": 0.9999, "step": 2234 }, { "epoch": 0.4582735288086939, "grad_norm": 1.207682728767395, "learning_rate": 1.182942962808058e-05, "loss": 0.9186, "step": 2235 }, { "epoch": 0.458478572893172, "grad_norm": 1.303304672241211, "learning_rate": 1.1822899471015346e-05, "loss": 0.9379, "step": 2236 }, { "epoch": 0.4586836169776502, "grad_norm": 1.1896532773971558, "learning_rate": 1.1816368509794365e-05, "loss": 0.9028, "step": 2237 }, { "epoch": 0.45888866106212833, "grad_norm": 1.3209627866744995, "learning_rate": 1.1809836747298723e-05, "loss": 0.9424, "step": 2238 }, { "epoch": 0.4590937051466065, "grad_norm": 1.2489677667617798, "learning_rate": 1.180330418640984e-05, "loss": 0.9735, "step": 2239 }, { "epoch": 0.45929874923108466, "grad_norm": 1.2431527376174927, "learning_rate": 1.1796770830009501e-05, "loss": 0.9541, "step": 2240 }, { "epoch": 0.45950379331556285, "grad_norm": 1.1349185705184937, "learning_rate": 1.1790236680979837e-05, "loss": 0.9482, "step": 2241 }, { "epoch": 0.459708837400041, "grad_norm": 1.1632579565048218, "learning_rate": 1.1783701742203326e-05, "loss": 0.9353, "step": 2242 }, { "epoch": 0.45991388148451917, "grad_norm": 1.2807122468948364, "learning_rate": 1.1777166016562803e-05, "loss": 0.9203, "step": 2243 }, { "epoch": 0.46011892556899736, "grad_norm": 1.260909914970398, "learning_rate": 1.177062950694144e-05, "loss": 0.9684, "step": 2244 }, { "epoch": 0.4603239696534755, "grad_norm": 1.2738577127456665, "learning_rate": 1.1764092216222762e-05, "loss": 0.9924, "step": 2245 }, { "epoch": 0.4605290137379537, "grad_norm": 1.229862928390503, "learning_rate": 1.1757554147290635e-05, "loss": 0.9214, "step": 2246 }, { "epoch": 0.4607340578224318, "grad_norm": 1.2269328832626343, "learning_rate": 1.1751015303029272e-05, "loss": 0.8856, "step": 2247 }, { "epoch": 0.46093910190691, "grad_norm": 1.1567474603652954, "learning_rate": 1.1744475686323225e-05, "loss": 0.8794, "step": 2248 }, { "epoch": 0.46114414599138814, "grad_norm": 1.2302711009979248, "learning_rate": 1.1737935300057378e-05, "loss": 0.8956, "step": 2249 }, { "epoch": 0.46134919007586633, "grad_norm": 1.1855483055114746, "learning_rate": 1.173139414711698e-05, "loss": 0.803, "step": 2250 }, { "epoch": 0.46155423416034447, "grad_norm": 1.2059268951416016, "learning_rate": 1.1724852230387587e-05, "loss": 0.9547, "step": 2251 }, { "epoch": 0.46175927824482266, "grad_norm": 1.2675702571868896, "learning_rate": 1.1718309552755118e-05, "loss": 0.8957, "step": 2252 }, { "epoch": 0.4619643223293008, "grad_norm": 1.1911017894744873, "learning_rate": 1.1711766117105807e-05, "loss": 0.8923, "step": 2253 }, { "epoch": 0.462169366413779, "grad_norm": 1.3014674186706543, "learning_rate": 1.170522192632624e-05, "loss": 0.9102, "step": 2254 }, { "epoch": 0.4623744104982571, "grad_norm": 1.3901101350784302, "learning_rate": 1.1698676983303323e-05, "loss": 1.0004, "step": 2255 }, { "epoch": 0.4625794545827353, "grad_norm": 1.2618812322616577, "learning_rate": 1.16921312909243e-05, "loss": 0.8758, "step": 2256 }, { "epoch": 0.46278449866721344, "grad_norm": 1.3944463729858398, "learning_rate": 1.1685584852076746e-05, "loss": 0.9092, "step": 2257 }, { "epoch": 0.46298954275169163, "grad_norm": 1.2616217136383057, "learning_rate": 1.167903766964856e-05, "loss": 0.9118, "step": 2258 }, { "epoch": 0.46319458683616976, "grad_norm": 1.4083495140075684, "learning_rate": 1.1672489746527979e-05, "loss": 0.9799, "step": 2259 }, { "epoch": 0.46339963092064795, "grad_norm": 1.211759328842163, "learning_rate": 1.1665941085603554e-05, "loss": 0.9887, "step": 2260 }, { "epoch": 0.4636046750051261, "grad_norm": 1.2457689046859741, "learning_rate": 1.1659391689764175e-05, "loss": 0.911, "step": 2261 }, { "epoch": 0.4638097190896043, "grad_norm": 1.3068349361419678, "learning_rate": 1.1652841561899042e-05, "loss": 0.9428, "step": 2262 }, { "epoch": 0.4640147631740824, "grad_norm": 1.2601584196090698, "learning_rate": 1.164629070489769e-05, "loss": 1.0037, "step": 2263 }, { "epoch": 0.4642198072585606, "grad_norm": 1.2778295278549194, "learning_rate": 1.1639739121649967e-05, "loss": 0.9447, "step": 2264 }, { "epoch": 0.46442485134303874, "grad_norm": 1.2948169708251953, "learning_rate": 1.163318681504605e-05, "loss": 1.038, "step": 2265 }, { "epoch": 0.4646298954275169, "grad_norm": 1.1730642318725586, "learning_rate": 1.1626633787976423e-05, "loss": 0.9099, "step": 2266 }, { "epoch": 0.46483493951199506, "grad_norm": 1.262499213218689, "learning_rate": 1.1620080043331901e-05, "loss": 0.9674, "step": 2267 }, { "epoch": 0.46503998359647325, "grad_norm": 1.2737869024276733, "learning_rate": 1.1613525584003611e-05, "loss": 0.9503, "step": 2268 }, { "epoch": 0.4652450276809514, "grad_norm": 1.1316441297531128, "learning_rate": 1.1606970412882987e-05, "loss": 0.8564, "step": 2269 }, { "epoch": 0.4654500717654296, "grad_norm": 1.2715529203414917, "learning_rate": 1.160041453286179e-05, "loss": 0.8537, "step": 2270 }, { "epoch": 0.4656551158499077, "grad_norm": 1.355565071105957, "learning_rate": 1.1593857946832085e-05, "loss": 0.9448, "step": 2271 }, { "epoch": 0.4658601599343859, "grad_norm": 1.2805097103118896, "learning_rate": 1.1587300657686254e-05, "loss": 0.9473, "step": 2272 }, { "epoch": 0.46606520401886403, "grad_norm": 1.4333724975585938, "learning_rate": 1.158074266831698e-05, "loss": 0.9216, "step": 2273 }, { "epoch": 0.4662702481033422, "grad_norm": 1.1741763353347778, "learning_rate": 1.1574183981617267e-05, "loss": 0.9315, "step": 2274 }, { "epoch": 0.46647529218782036, "grad_norm": 1.2519584894180298, "learning_rate": 1.1567624600480418e-05, "loss": 0.9939, "step": 2275 }, { "epoch": 0.46668033627229855, "grad_norm": 1.1887861490249634, "learning_rate": 1.1561064527800046e-05, "loss": 0.848, "step": 2276 }, { "epoch": 0.4668853803567767, "grad_norm": 1.372680902481079, "learning_rate": 1.1554503766470069e-05, "loss": 0.9558, "step": 2277 }, { "epoch": 0.46709042444125487, "grad_norm": 1.221906065940857, "learning_rate": 1.1547942319384708e-05, "loss": 0.9555, "step": 2278 }, { "epoch": 0.46729546852573306, "grad_norm": 1.3452517986297607, "learning_rate": 1.1541380189438483e-05, "loss": 0.921, "step": 2279 }, { "epoch": 0.4675005126102112, "grad_norm": 1.2379419803619385, "learning_rate": 1.1534817379526224e-05, "loss": 0.8959, "step": 2280 }, { "epoch": 0.4677055566946894, "grad_norm": 1.2996991872787476, "learning_rate": 1.1528253892543053e-05, "loss": 1.0316, "step": 2281 }, { "epoch": 0.4679106007791675, "grad_norm": 1.3275333642959595, "learning_rate": 1.1521689731384391e-05, "loss": 0.9186, "step": 2282 }, { "epoch": 0.4681156448636457, "grad_norm": 1.3511923551559448, "learning_rate": 1.1515124898945962e-05, "loss": 0.9813, "step": 2283 }, { "epoch": 0.46832068894812384, "grad_norm": 1.2611078023910522, "learning_rate": 1.1508559398123783e-05, "loss": 0.9565, "step": 2284 }, { "epoch": 0.46852573303260203, "grad_norm": 1.3142013549804688, "learning_rate": 1.1501993231814161e-05, "loss": 0.922, "step": 2285 }, { "epoch": 0.46873077711708017, "grad_norm": 1.2418158054351807, "learning_rate": 1.149542640291371e-05, "loss": 0.9221, "step": 2286 }, { "epoch": 0.46893582120155836, "grad_norm": 1.2041058540344238, "learning_rate": 1.1488858914319321e-05, "loss": 0.8922, "step": 2287 }, { "epoch": 0.4691408652860365, "grad_norm": 1.267458200454712, "learning_rate": 1.1482290768928186e-05, "loss": 0.9489, "step": 2288 }, { "epoch": 0.4693459093705147, "grad_norm": 1.2937408685684204, "learning_rate": 1.1475721969637778e-05, "loss": 0.9324, "step": 2289 }, { "epoch": 0.4695509534549928, "grad_norm": 1.3400555849075317, "learning_rate": 1.1469152519345872e-05, "loss": 0.8537, "step": 2290 }, { "epoch": 0.469755997539471, "grad_norm": 1.2695257663726807, "learning_rate": 1.1462582420950514e-05, "loss": 0.9733, "step": 2291 }, { "epoch": 0.46996104162394914, "grad_norm": 1.1958836317062378, "learning_rate": 1.1456011677350052e-05, "loss": 0.9421, "step": 2292 }, { "epoch": 0.47016608570842733, "grad_norm": 1.2226345539093018, "learning_rate": 1.1449440291443102e-05, "loss": 0.9792, "step": 2293 }, { "epoch": 0.47037112979290546, "grad_norm": 1.29632568359375, "learning_rate": 1.1442868266128578e-05, "loss": 0.939, "step": 2294 }, { "epoch": 0.47057617387738365, "grad_norm": 1.1954978704452515, "learning_rate": 1.1436295604305667e-05, "loss": 0.9737, "step": 2295 }, { "epoch": 0.4707812179618618, "grad_norm": 1.1564425230026245, "learning_rate": 1.1429722308873842e-05, "loss": 0.8477, "step": 2296 }, { "epoch": 0.47098626204634, "grad_norm": 1.3108270168304443, "learning_rate": 1.1423148382732854e-05, "loss": 0.9325, "step": 2297 }, { "epoch": 0.4711913061308181, "grad_norm": 1.3147389888763428, "learning_rate": 1.1416573828782726e-05, "loss": 0.9825, "step": 2298 }, { "epoch": 0.4713963502152963, "grad_norm": 1.1251182556152344, "learning_rate": 1.140999864992377e-05, "loss": 0.8413, "step": 2299 }, { "epoch": 0.47160139429977443, "grad_norm": 1.2277265787124634, "learning_rate": 1.1403422849056562e-05, "loss": 0.982, "step": 2300 }, { "epoch": 0.4718064383842526, "grad_norm": 1.2771401405334473, "learning_rate": 1.1396846429081961e-05, "loss": 0.9512, "step": 2301 }, { "epoch": 0.47201148246873076, "grad_norm": 1.3816829919815063, "learning_rate": 1.1390269392901096e-05, "loss": 0.9681, "step": 2302 }, { "epoch": 0.47221652655320895, "grad_norm": 1.1976858377456665, "learning_rate": 1.1383691743415364e-05, "loss": 0.9426, "step": 2303 }, { "epoch": 0.4724215706376871, "grad_norm": 1.3701140880584717, "learning_rate": 1.1377113483526441e-05, "loss": 1.0078, "step": 2304 }, { "epoch": 0.4726266147221653, "grad_norm": 1.33538019657135, "learning_rate": 1.1370534616136259e-05, "loss": 0.8993, "step": 2305 }, { "epoch": 0.4728316588066434, "grad_norm": 1.2644497156143188, "learning_rate": 1.1363955144147037e-05, "loss": 1.0251, "step": 2306 }, { "epoch": 0.4730367028911216, "grad_norm": 1.2820833921432495, "learning_rate": 1.1357375070461241e-05, "loss": 0.9955, "step": 2307 }, { "epoch": 0.47324174697559973, "grad_norm": 1.2815390825271606, "learning_rate": 1.1350794397981617e-05, "loss": 0.9672, "step": 2308 }, { "epoch": 0.4734467910600779, "grad_norm": 1.2174122333526611, "learning_rate": 1.1344213129611165e-05, "loss": 0.9943, "step": 2309 }, { "epoch": 0.47365183514455605, "grad_norm": 1.2093653678894043, "learning_rate": 1.133763126825316e-05, "loss": 0.9578, "step": 2310 }, { "epoch": 0.47385687922903424, "grad_norm": 1.328372597694397, "learning_rate": 1.1331048816811122e-05, "loss": 0.9432, "step": 2311 }, { "epoch": 0.4740619233135124, "grad_norm": 1.2515137195587158, "learning_rate": 1.1324465778188846e-05, "loss": 0.9931, "step": 2312 }, { "epoch": 0.47426696739799057, "grad_norm": 1.3958075046539307, "learning_rate": 1.131788215529038e-05, "loss": 0.8942, "step": 2313 }, { "epoch": 0.47447201148246876, "grad_norm": 1.229387640953064, "learning_rate": 1.1311297951020028e-05, "loss": 0.9462, "step": 2314 }, { "epoch": 0.4746770555669469, "grad_norm": 1.2051845788955688, "learning_rate": 1.1304713168282356e-05, "loss": 0.9201, "step": 2315 }, { "epoch": 0.4748820996514251, "grad_norm": 1.2823469638824463, "learning_rate": 1.1298127809982176e-05, "loss": 0.8724, "step": 2316 }, { "epoch": 0.4750871437359032, "grad_norm": 1.2716069221496582, "learning_rate": 1.1291541879024568e-05, "loss": 0.925, "step": 2317 }, { "epoch": 0.4752921878203814, "grad_norm": 1.2668980360031128, "learning_rate": 1.1284955378314848e-05, "loss": 0.8777, "step": 2318 }, { "epoch": 0.47549723190485954, "grad_norm": 1.25950026512146, "learning_rate": 1.1278368310758593e-05, "loss": 0.9101, "step": 2319 }, { "epoch": 0.47570227598933773, "grad_norm": 1.261542558670044, "learning_rate": 1.1271780679261638e-05, "loss": 0.8699, "step": 2320 }, { "epoch": 0.47590732007381586, "grad_norm": 1.3017336130142212, "learning_rate": 1.1265192486730043e-05, "loss": 0.966, "step": 2321 }, { "epoch": 0.47611236415829405, "grad_norm": 1.193358063697815, "learning_rate": 1.1258603736070145e-05, "loss": 0.9433, "step": 2322 }, { "epoch": 0.4763174082427722, "grad_norm": 1.2857511043548584, "learning_rate": 1.1252014430188498e-05, "loss": 0.869, "step": 2323 }, { "epoch": 0.4765224523272504, "grad_norm": 1.2381908893585205, "learning_rate": 1.1245424571991928e-05, "loss": 0.9064, "step": 2324 }, { "epoch": 0.4767274964117285, "grad_norm": 1.32990300655365, "learning_rate": 1.123883416438748e-05, "loss": 0.9358, "step": 2325 }, { "epoch": 0.4769325404962067, "grad_norm": 1.3859519958496094, "learning_rate": 1.1232243210282463e-05, "loss": 0.9794, "step": 2326 }, { "epoch": 0.47713758458068484, "grad_norm": 1.3312433958053589, "learning_rate": 1.1225651712584413e-05, "loss": 0.9789, "step": 2327 }, { "epoch": 0.477342628665163, "grad_norm": 1.318833589553833, "learning_rate": 1.1219059674201108e-05, "loss": 0.9282, "step": 2328 }, { "epoch": 0.47754767274964116, "grad_norm": 1.2741336822509766, "learning_rate": 1.1212467098040572e-05, "loss": 0.951, "step": 2329 }, { "epoch": 0.47775271683411935, "grad_norm": 1.2595329284667969, "learning_rate": 1.1205873987011054e-05, "loss": 0.9317, "step": 2330 }, { "epoch": 0.4779577609185975, "grad_norm": 1.2174912691116333, "learning_rate": 1.119928034402105e-05, "loss": 0.8849, "step": 2331 }, { "epoch": 0.4781628050030757, "grad_norm": 1.184758186340332, "learning_rate": 1.1192686171979288e-05, "loss": 0.9088, "step": 2332 }, { "epoch": 0.4783678490875538, "grad_norm": 1.2419131994247437, "learning_rate": 1.1186091473794725e-05, "loss": 0.9239, "step": 2333 }, { "epoch": 0.478572893172032, "grad_norm": 1.2545307874679565, "learning_rate": 1.1179496252376551e-05, "loss": 0.9166, "step": 2334 }, { "epoch": 0.47877793725651013, "grad_norm": 1.2351535558700562, "learning_rate": 1.1172900510634194e-05, "loss": 0.8998, "step": 2335 }, { "epoch": 0.4789829813409883, "grad_norm": 1.3010034561157227, "learning_rate": 1.1166304251477302e-05, "loss": 0.955, "step": 2336 }, { "epoch": 0.47918802542546646, "grad_norm": 1.2497669458389282, "learning_rate": 1.1159707477815756e-05, "loss": 0.9503, "step": 2337 }, { "epoch": 0.47939306950994465, "grad_norm": 1.2605311870574951, "learning_rate": 1.1153110192559667e-05, "loss": 0.9662, "step": 2338 }, { "epoch": 0.4795981135944228, "grad_norm": 1.355643630027771, "learning_rate": 1.1146512398619362e-05, "loss": 1.0, "step": 2339 }, { "epoch": 0.47980315767890097, "grad_norm": 1.0864598751068115, "learning_rate": 1.1139914098905406e-05, "loss": 0.8934, "step": 2340 }, { "epoch": 0.4800082017633791, "grad_norm": 1.331876277923584, "learning_rate": 1.1133315296328574e-05, "loss": 0.9404, "step": 2341 }, { "epoch": 0.4802132458478573, "grad_norm": 1.3696379661560059, "learning_rate": 1.1126715993799875e-05, "loss": 0.9616, "step": 2342 }, { "epoch": 0.48041828993233543, "grad_norm": 1.4046512842178345, "learning_rate": 1.1120116194230524e-05, "loss": 0.9953, "step": 2343 }, { "epoch": 0.4806233340168136, "grad_norm": 1.3819472789764404, "learning_rate": 1.1113515900531966e-05, "loss": 0.9589, "step": 2344 }, { "epoch": 0.48082837810129175, "grad_norm": 1.2735899686813354, "learning_rate": 1.1106915115615867e-05, "loss": 0.9311, "step": 2345 }, { "epoch": 0.48103342218576994, "grad_norm": 1.3133738040924072, "learning_rate": 1.1100313842394093e-05, "loss": 0.9349, "step": 2346 }, { "epoch": 0.4812384662702481, "grad_norm": 1.2976882457733154, "learning_rate": 1.1093712083778748e-05, "loss": 0.9672, "step": 2347 }, { "epoch": 0.48144351035472627, "grad_norm": 1.3592437505722046, "learning_rate": 1.108710984268213e-05, "loss": 0.9457, "step": 2348 }, { "epoch": 0.4816485544392044, "grad_norm": 1.1863330602645874, "learning_rate": 1.1080507122016762e-05, "loss": 0.9773, "step": 2349 }, { "epoch": 0.4818535985236826, "grad_norm": 1.2529540061950684, "learning_rate": 1.1073903924695373e-05, "loss": 0.9137, "step": 2350 }, { "epoch": 0.4820586426081608, "grad_norm": 1.2562220096588135, "learning_rate": 1.106730025363091e-05, "loss": 0.9304, "step": 2351 }, { "epoch": 0.4822636866926389, "grad_norm": 1.1480106115341187, "learning_rate": 1.1060696111736515e-05, "loss": 0.9235, "step": 2352 }, { "epoch": 0.4824687307771171, "grad_norm": 1.2336939573287964, "learning_rate": 1.1054091501925555e-05, "loss": 0.8888, "step": 2353 }, { "epoch": 0.48267377486159524, "grad_norm": 1.1987907886505127, "learning_rate": 1.1047486427111585e-05, "loss": 0.8347, "step": 2354 }, { "epoch": 0.48287881894607343, "grad_norm": 1.3058503866195679, "learning_rate": 1.1040880890208378e-05, "loss": 0.9375, "step": 2355 }, { "epoch": 0.48308386303055156, "grad_norm": 1.4142467975616455, "learning_rate": 1.1034274894129913e-05, "loss": 1.0182, "step": 2356 }, { "epoch": 0.48328890711502975, "grad_norm": 1.2870304584503174, "learning_rate": 1.1027668441790358e-05, "loss": 0.8791, "step": 2357 }, { "epoch": 0.4834939511995079, "grad_norm": 1.2245054244995117, "learning_rate": 1.1021061536104093e-05, "loss": 0.9852, "step": 2358 }, { "epoch": 0.4836989952839861, "grad_norm": 1.1247659921646118, "learning_rate": 1.1014454179985699e-05, "loss": 0.8958, "step": 2359 }, { "epoch": 0.4839040393684642, "grad_norm": 1.4413527250289917, "learning_rate": 1.1007846376349944e-05, "loss": 0.9885, "step": 2360 }, { "epoch": 0.4841090834529424, "grad_norm": 1.3212698698043823, "learning_rate": 1.1001238128111808e-05, "loss": 0.9757, "step": 2361 }, { "epoch": 0.48431412753742054, "grad_norm": 1.2505444288253784, "learning_rate": 1.099462943818646e-05, "loss": 0.9209, "step": 2362 }, { "epoch": 0.4845191716218987, "grad_norm": 1.3299827575683594, "learning_rate": 1.0988020309489258e-05, "loss": 0.9376, "step": 2363 }, { "epoch": 0.48472421570637686, "grad_norm": 1.2163385152816772, "learning_rate": 1.0981410744935763e-05, "loss": 0.844, "step": 2364 }, { "epoch": 0.48492925979085505, "grad_norm": 1.244969129562378, "learning_rate": 1.097480074744173e-05, "loss": 0.972, "step": 2365 }, { "epoch": 0.4851343038753332, "grad_norm": 1.1995515823364258, "learning_rate": 1.0968190319923091e-05, "loss": 0.9176, "step": 2366 }, { "epoch": 0.4853393479598114, "grad_norm": 1.257214069366455, "learning_rate": 1.0961579465295987e-05, "loss": 0.9346, "step": 2367 }, { "epoch": 0.4855443920442895, "grad_norm": 1.2380491495132446, "learning_rate": 1.095496818647673e-05, "loss": 0.9127, "step": 2368 }, { "epoch": 0.4857494361287677, "grad_norm": 1.2592294216156006, "learning_rate": 1.0948356486381829e-05, "loss": 0.9048, "step": 2369 }, { "epoch": 0.48595448021324583, "grad_norm": 1.189805030822754, "learning_rate": 1.0941744367927974e-05, "loss": 0.919, "step": 2370 }, { "epoch": 0.486159524297724, "grad_norm": 1.205006718635559, "learning_rate": 1.0935131834032045e-05, "loss": 0.9736, "step": 2371 }, { "epoch": 0.48636456838220216, "grad_norm": 1.2154924869537354, "learning_rate": 1.0928518887611099e-05, "loss": 0.8931, "step": 2372 }, { "epoch": 0.48656961246668035, "grad_norm": 1.2445884943008423, "learning_rate": 1.0921905531582377e-05, "loss": 0.9238, "step": 2373 }, { "epoch": 0.4867746565511585, "grad_norm": 1.3375270366668701, "learning_rate": 1.091529176886331e-05, "loss": 0.957, "step": 2374 }, { "epoch": 0.48697970063563667, "grad_norm": 1.2483735084533691, "learning_rate": 1.0908677602371492e-05, "loss": 0.9528, "step": 2375 }, { "epoch": 0.4871847447201148, "grad_norm": 1.1472978591918945, "learning_rate": 1.0902063035024701e-05, "loss": 0.904, "step": 2376 }, { "epoch": 0.487389788804593, "grad_norm": 1.2283895015716553, "learning_rate": 1.0895448069740902e-05, "loss": 0.9232, "step": 2377 }, { "epoch": 0.48759483288907113, "grad_norm": 1.2386119365692139, "learning_rate": 1.0888832709438222e-05, "loss": 0.9575, "step": 2378 }, { "epoch": 0.4877998769735493, "grad_norm": 1.2038578987121582, "learning_rate": 1.088221695703497e-05, "loss": 0.8958, "step": 2379 }, { "epoch": 0.48800492105802745, "grad_norm": 1.3277637958526611, "learning_rate": 1.0875600815449624e-05, "loss": 0.9054, "step": 2380 }, { "epoch": 0.48820996514250564, "grad_norm": 1.351200819015503, "learning_rate": 1.0868984287600838e-05, "loss": 0.898, "step": 2381 }, { "epoch": 0.4884150092269838, "grad_norm": 1.2833501100540161, "learning_rate": 1.0862367376407433e-05, "loss": 0.8347, "step": 2382 }, { "epoch": 0.48862005331146197, "grad_norm": 1.4407817125320435, "learning_rate": 1.08557500847884e-05, "loss": 0.9404, "step": 2383 }, { "epoch": 0.4888250973959401, "grad_norm": 1.302977204322815, "learning_rate": 1.0849132415662896e-05, "loss": 0.939, "step": 2384 }, { "epoch": 0.4890301414804183, "grad_norm": 1.1466697454452515, "learning_rate": 1.0842514371950248e-05, "loss": 0.9617, "step": 2385 }, { "epoch": 0.4892351855648965, "grad_norm": 1.3802999258041382, "learning_rate": 1.0835895956569946e-05, "loss": 0.949, "step": 2386 }, { "epoch": 0.4894402296493746, "grad_norm": 1.2387243509292603, "learning_rate": 1.0829277172441648e-05, "loss": 0.9173, "step": 2387 }, { "epoch": 0.4896452737338528, "grad_norm": 1.290225625038147, "learning_rate": 1.0822658022485165e-05, "loss": 0.868, "step": 2388 }, { "epoch": 0.48985031781833094, "grad_norm": 1.2867136001586914, "learning_rate": 1.0816038509620476e-05, "loss": 0.9996, "step": 2389 }, { "epoch": 0.49005536190280913, "grad_norm": 1.2959166765213013, "learning_rate": 1.0809418636767727e-05, "loss": 0.9432, "step": 2390 }, { "epoch": 0.49026040598728726, "grad_norm": 1.3884201049804688, "learning_rate": 1.0802798406847213e-05, "loss": 0.9594, "step": 2391 }, { "epoch": 0.49046545007176545, "grad_norm": 1.2361559867858887, "learning_rate": 1.0796177822779384e-05, "loss": 0.9383, "step": 2392 }, { "epoch": 0.4906704941562436, "grad_norm": 1.2203317880630493, "learning_rate": 1.0789556887484853e-05, "loss": 0.9218, "step": 2393 }, { "epoch": 0.4908755382407218, "grad_norm": 1.3405808210372925, "learning_rate": 1.078293560388439e-05, "loss": 0.8877, "step": 2394 }, { "epoch": 0.4910805823251999, "grad_norm": 1.2116281986236572, "learning_rate": 1.077631397489891e-05, "loss": 0.896, "step": 2395 }, { "epoch": 0.4912856264096781, "grad_norm": 1.2285351753234863, "learning_rate": 1.0769692003449489e-05, "loss": 0.9027, "step": 2396 }, { "epoch": 0.49149067049415623, "grad_norm": 1.310724139213562, "learning_rate": 1.0763069692457346e-05, "loss": 1.0116, "step": 2397 }, { "epoch": 0.4916957145786344, "grad_norm": 1.2704797983169556, "learning_rate": 1.0756447044843858e-05, "loss": 0.9822, "step": 2398 }, { "epoch": 0.49190075866311256, "grad_norm": 1.2684030532836914, "learning_rate": 1.0749824063530548e-05, "loss": 0.9096, "step": 2399 }, { "epoch": 0.49210580274759075, "grad_norm": 1.2159838676452637, "learning_rate": 1.0743200751439078e-05, "loss": 0.9562, "step": 2400 }, { "epoch": 0.4923108468320689, "grad_norm": 1.2131184339523315, "learning_rate": 1.073657711149127e-05, "loss": 0.9709, "step": 2401 }, { "epoch": 0.49251589091654707, "grad_norm": 1.230259895324707, "learning_rate": 1.0729953146609076e-05, "loss": 0.9796, "step": 2402 }, { "epoch": 0.4927209350010252, "grad_norm": 1.2229448556900024, "learning_rate": 1.0723328859714605e-05, "loss": 0.9257, "step": 2403 }, { "epoch": 0.4929259790855034, "grad_norm": 1.2476431131362915, "learning_rate": 1.07167042537301e-05, "loss": 0.964, "step": 2404 }, { "epoch": 0.49313102316998153, "grad_norm": 1.2776856422424316, "learning_rate": 1.0710079331577947e-05, "loss": 0.9384, "step": 2405 }, { "epoch": 0.4933360672544597, "grad_norm": 1.3149300813674927, "learning_rate": 1.0703454096180665e-05, "loss": 0.9063, "step": 2406 }, { "epoch": 0.49354111133893785, "grad_norm": 1.220231533050537, "learning_rate": 1.0696828550460928e-05, "loss": 0.9157, "step": 2407 }, { "epoch": 0.49374615542341604, "grad_norm": 1.213073492050171, "learning_rate": 1.0690202697341528e-05, "loss": 0.9767, "step": 2408 }, { "epoch": 0.4939511995078942, "grad_norm": 1.4144096374511719, "learning_rate": 1.0683576539745401e-05, "loss": 0.9402, "step": 2409 }, { "epoch": 0.49415624359237237, "grad_norm": 1.2439824342727661, "learning_rate": 1.067695008059562e-05, "loss": 0.9347, "step": 2410 }, { "epoch": 0.4943612876768505, "grad_norm": 1.3117263317108154, "learning_rate": 1.0670323322815386e-05, "loss": 0.9076, "step": 2411 }, { "epoch": 0.4945663317613287, "grad_norm": 1.2719899415969849, "learning_rate": 1.0663696269328034e-05, "loss": 1.0017, "step": 2412 }, { "epoch": 0.4947713758458068, "grad_norm": 1.306816577911377, "learning_rate": 1.065706892305703e-05, "loss": 0.9459, "step": 2413 }, { "epoch": 0.494976419930285, "grad_norm": 1.2239879369735718, "learning_rate": 1.0650441286925968e-05, "loss": 0.8586, "step": 2414 }, { "epoch": 0.49518146401476315, "grad_norm": 1.257633090019226, "learning_rate": 1.0643813363858568e-05, "loss": 0.9747, "step": 2415 }, { "epoch": 0.49538650809924134, "grad_norm": 1.4040915966033936, "learning_rate": 1.0637185156778683e-05, "loss": 0.9901, "step": 2416 }, { "epoch": 0.4955915521837195, "grad_norm": 1.1902872323989868, "learning_rate": 1.0630556668610286e-05, "loss": 0.9609, "step": 2417 }, { "epoch": 0.49579659626819766, "grad_norm": 1.346359372138977, "learning_rate": 1.0623927902277468e-05, "loss": 0.9244, "step": 2418 }, { "epoch": 0.4960016403526758, "grad_norm": 1.20093834400177, "learning_rate": 1.0617298860704461e-05, "loss": 0.921, "step": 2419 }, { "epoch": 0.496206684437154, "grad_norm": 1.3351420164108276, "learning_rate": 1.0610669546815602e-05, "loss": 0.9088, "step": 2420 }, { "epoch": 0.4964117285216322, "grad_norm": 1.2298493385314941, "learning_rate": 1.060403996353535e-05, "loss": 0.9422, "step": 2421 }, { "epoch": 0.4966167726061103, "grad_norm": 1.2941133975982666, "learning_rate": 1.059741011378829e-05, "loss": 0.9161, "step": 2422 }, { "epoch": 0.4968218166905885, "grad_norm": 1.268972396850586, "learning_rate": 1.0590780000499123e-05, "loss": 0.978, "step": 2423 }, { "epoch": 0.49702686077506664, "grad_norm": 1.2281603813171387, "learning_rate": 1.0584149626592662e-05, "loss": 0.9737, "step": 2424 }, { "epoch": 0.4972319048595448, "grad_norm": 1.3990176916122437, "learning_rate": 1.0577518994993832e-05, "loss": 0.9055, "step": 2425 }, { "epoch": 0.49743694894402296, "grad_norm": 1.3074995279312134, "learning_rate": 1.0570888108627682e-05, "loss": 0.9666, "step": 2426 }, { "epoch": 0.49764199302850115, "grad_norm": 1.2857521772384644, "learning_rate": 1.0564256970419367e-05, "loss": 0.9447, "step": 2427 }, { "epoch": 0.4978470371129793, "grad_norm": 1.234874963760376, "learning_rate": 1.0557625583294154e-05, "loss": 0.8996, "step": 2428 }, { "epoch": 0.4980520811974575, "grad_norm": 1.2249690294265747, "learning_rate": 1.0550993950177417e-05, "loss": 0.9444, "step": 2429 }, { "epoch": 0.4982571252819356, "grad_norm": 1.3811794519424438, "learning_rate": 1.0544362073994645e-05, "loss": 0.952, "step": 2430 }, { "epoch": 0.4984621693664138, "grad_norm": 1.2547556161880493, "learning_rate": 1.053772995767143e-05, "loss": 0.9318, "step": 2431 }, { "epoch": 0.49866721345089193, "grad_norm": 1.247941255569458, "learning_rate": 1.0531097604133473e-05, "loss": 0.963, "step": 2432 }, { "epoch": 0.4988722575353701, "grad_norm": 1.1878818273544312, "learning_rate": 1.0524465016306572e-05, "loss": 0.8819, "step": 2433 }, { "epoch": 0.49907730161984826, "grad_norm": 1.2437410354614258, "learning_rate": 1.0517832197116632e-05, "loss": 0.9714, "step": 2434 }, { "epoch": 0.49928234570432645, "grad_norm": 1.3229812383651733, "learning_rate": 1.0511199149489673e-05, "loss": 0.8646, "step": 2435 }, { "epoch": 0.4994873897888046, "grad_norm": 1.4246089458465576, "learning_rate": 1.0504565876351794e-05, "loss": 0.9035, "step": 2436 }, { "epoch": 0.49969243387328277, "grad_norm": 1.328335165977478, "learning_rate": 1.0497932380629207e-05, "loss": 0.8933, "step": 2437 }, { "epoch": 0.4998974779577609, "grad_norm": 1.3100916147232056, "learning_rate": 1.049129866524822e-05, "loss": 0.9136, "step": 2438 }, { "epoch": 0.500102522042239, "grad_norm": 1.1684653759002686, "learning_rate": 1.0484664733135237e-05, "loss": 0.9199, "step": 2439 }, { "epoch": 0.5003075661267172, "grad_norm": 1.3159795999526978, "learning_rate": 1.0478030587216757e-05, "loss": 0.9723, "step": 2440 }, { "epoch": 0.5005126102111954, "grad_norm": 1.2599294185638428, "learning_rate": 1.047139623041937e-05, "loss": 0.9031, "step": 2441 }, { "epoch": 0.5007176542956736, "grad_norm": 1.2308599948883057, "learning_rate": 1.0464761665669771e-05, "loss": 0.9104, "step": 2442 }, { "epoch": 0.5009226983801517, "grad_norm": 1.2678866386413574, "learning_rate": 1.045812689589473e-05, "loss": 0.9297, "step": 2443 }, { "epoch": 0.5011277424646299, "grad_norm": 1.2168529033660889, "learning_rate": 1.0451491924021127e-05, "loss": 0.998, "step": 2444 }, { "epoch": 0.5013327865491081, "grad_norm": 1.2809638977050781, "learning_rate": 1.0444856752975909e-05, "loss": 0.9248, "step": 2445 }, { "epoch": 0.5015378306335863, "grad_norm": 1.29377281665802, "learning_rate": 1.0438221385686129e-05, "loss": 0.9163, "step": 2446 }, { "epoch": 0.5017428747180643, "grad_norm": 1.3054053783416748, "learning_rate": 1.0431585825078916e-05, "loss": 0.95, "step": 2447 }, { "epoch": 0.5019479188025425, "grad_norm": 1.3488242626190186, "learning_rate": 1.0424950074081492e-05, "loss": 0.9358, "step": 2448 }, { "epoch": 0.5021529628870207, "grad_norm": 1.2941133975982666, "learning_rate": 1.0418314135621155e-05, "loss": 0.9672, "step": 2449 }, { "epoch": 0.5023580069714989, "grad_norm": 1.278578519821167, "learning_rate": 1.0411678012625292e-05, "loss": 0.9201, "step": 2450 }, { "epoch": 0.502563051055977, "grad_norm": 1.469719409942627, "learning_rate": 1.0405041708021366e-05, "loss": 0.892, "step": 2451 }, { "epoch": 0.5027680951404552, "grad_norm": 1.3437894582748413, "learning_rate": 1.0398405224736927e-05, "loss": 0.8573, "step": 2452 }, { "epoch": 0.5029731392249334, "grad_norm": 1.3195449113845825, "learning_rate": 1.0391768565699602e-05, "loss": 0.9181, "step": 2453 }, { "epoch": 0.5031781833094116, "grad_norm": 1.1644749641418457, "learning_rate": 1.0385131733837088e-05, "loss": 0.8593, "step": 2454 }, { "epoch": 0.5033832273938896, "grad_norm": 1.2594523429870605, "learning_rate": 1.0378494732077167e-05, "loss": 0.9479, "step": 2455 }, { "epoch": 0.5035882714783678, "grad_norm": 1.3602133989334106, "learning_rate": 1.0371857563347694e-05, "loss": 0.9353, "step": 2456 }, { "epoch": 0.503793315562846, "grad_norm": 1.2429604530334473, "learning_rate": 1.0365220230576592e-05, "loss": 0.9878, "step": 2457 }, { "epoch": 0.5039983596473242, "grad_norm": 1.183841347694397, "learning_rate": 1.0358582736691863e-05, "loss": 0.9354, "step": 2458 }, { "epoch": 0.5042034037318024, "grad_norm": 1.3520174026489258, "learning_rate": 1.0351945084621578e-05, "loss": 0.9377, "step": 2459 }, { "epoch": 0.5044084478162805, "grad_norm": 1.2705063819885254, "learning_rate": 1.0345307277293877e-05, "loss": 0.8964, "step": 2460 }, { "epoch": 0.5046134919007587, "grad_norm": 1.3913613557815552, "learning_rate": 1.0338669317636967e-05, "loss": 0.9054, "step": 2461 }, { "epoch": 0.5048185359852368, "grad_norm": 1.343747615814209, "learning_rate": 1.0332031208579133e-05, "loss": 0.9085, "step": 2462 }, { "epoch": 0.505023580069715, "grad_norm": 1.161189317703247, "learning_rate": 1.0325392953048707e-05, "loss": 0.8182, "step": 2463 }, { "epoch": 0.5052286241541931, "grad_norm": 1.35939359664917, "learning_rate": 1.0318754553974102e-05, "loss": 0.8426, "step": 2464 }, { "epoch": 0.5054336682386713, "grad_norm": 1.4114925861358643, "learning_rate": 1.0312116014283785e-05, "loss": 0.9105, "step": 2465 }, { "epoch": 0.5056387123231495, "grad_norm": 1.327795386314392, "learning_rate": 1.0305477336906293e-05, "loss": 0.9727, "step": 2466 }, { "epoch": 0.5058437564076277, "grad_norm": 1.296152949333191, "learning_rate": 1.0298838524770212e-05, "loss": 0.9474, "step": 2467 }, { "epoch": 0.5060488004921058, "grad_norm": 1.2306034564971924, "learning_rate": 1.02921995808042e-05, "loss": 0.936, "step": 2468 }, { "epoch": 0.506253844576584, "grad_norm": 1.236353874206543, "learning_rate": 1.0285560507936962e-05, "loss": 0.9081, "step": 2469 }, { "epoch": 0.5064588886610621, "grad_norm": 1.378982663154602, "learning_rate": 1.0278921309097272e-05, "loss": 0.8716, "step": 2470 }, { "epoch": 0.5066639327455403, "grad_norm": 1.3573251962661743, "learning_rate": 1.027228198721395e-05, "loss": 1.0142, "step": 2471 }, { "epoch": 0.5068689768300184, "grad_norm": 1.3067071437835693, "learning_rate": 1.0265642545215872e-05, "loss": 0.937, "step": 2472 }, { "epoch": 0.5070740209144966, "grad_norm": 1.3382048606872559, "learning_rate": 1.0259002986031972e-05, "loss": 0.9691, "step": 2473 }, { "epoch": 0.5072790649989748, "grad_norm": 1.2207930088043213, "learning_rate": 1.0252363312591225e-05, "loss": 0.9646, "step": 2474 }, { "epoch": 0.507484109083453, "grad_norm": 1.352571964263916, "learning_rate": 1.0245723527822671e-05, "loss": 0.8814, "step": 2475 }, { "epoch": 0.5076891531679311, "grad_norm": 1.2841496467590332, "learning_rate": 1.0239083634655384e-05, "loss": 0.9754, "step": 2476 }, { "epoch": 0.5078941972524093, "grad_norm": 1.225732445716858, "learning_rate": 1.0232443636018502e-05, "loss": 0.9041, "step": 2477 }, { "epoch": 0.5080992413368874, "grad_norm": 1.2913864850997925, "learning_rate": 1.0225803534841192e-05, "loss": 0.9525, "step": 2478 }, { "epoch": 0.5083042854213656, "grad_norm": 1.2007349729537964, "learning_rate": 1.0219163334052682e-05, "loss": 0.9244, "step": 2479 }, { "epoch": 0.5085093295058437, "grad_norm": 1.1634489297866821, "learning_rate": 1.0212523036582234e-05, "loss": 0.8699, "step": 2480 }, { "epoch": 0.5087143735903219, "grad_norm": 1.2169028520584106, "learning_rate": 1.0205882645359153e-05, "loss": 0.9464, "step": 2481 }, { "epoch": 0.5089194176748001, "grad_norm": 1.243175745010376, "learning_rate": 1.0199242163312794e-05, "loss": 0.9888, "step": 2482 }, { "epoch": 0.5091244617592783, "grad_norm": 1.3112635612487793, "learning_rate": 1.0192601593372542e-05, "loss": 0.9404, "step": 2483 }, { "epoch": 0.5093295058437564, "grad_norm": 1.3704006671905518, "learning_rate": 1.0185960938467826e-05, "loss": 0.9636, "step": 2484 }, { "epoch": 0.5095345499282345, "grad_norm": 1.2957366704940796, "learning_rate": 1.0179320201528109e-05, "loss": 0.9237, "step": 2485 }, { "epoch": 0.5097395940127127, "grad_norm": 1.2502808570861816, "learning_rate": 1.0172679385482896e-05, "loss": 0.9861, "step": 2486 }, { "epoch": 0.5099446380971909, "grad_norm": 1.2868233919143677, "learning_rate": 1.0166038493261723e-05, "loss": 1.0126, "step": 2487 }, { "epoch": 0.510149682181669, "grad_norm": 1.2805896997451782, "learning_rate": 1.0159397527794157e-05, "loss": 0.9675, "step": 2488 }, { "epoch": 0.5103547262661472, "grad_norm": 1.4396005868911743, "learning_rate": 1.0152756492009806e-05, "loss": 0.9388, "step": 2489 }, { "epoch": 0.5105597703506254, "grad_norm": 1.2956995964050293, "learning_rate": 1.0146115388838293e-05, "loss": 0.9806, "step": 2490 }, { "epoch": 0.5107648144351036, "grad_norm": 1.2307757139205933, "learning_rate": 1.0139474221209294e-05, "loss": 0.9457, "step": 2491 }, { "epoch": 0.5109698585195818, "grad_norm": 1.3410184383392334, "learning_rate": 1.013283299205249e-05, "loss": 0.9955, "step": 2492 }, { "epoch": 0.5111749026040598, "grad_norm": 1.2726000547409058, "learning_rate": 1.0126191704297606e-05, "loss": 0.8687, "step": 2493 }, { "epoch": 0.511379946688538, "grad_norm": 1.276762843132019, "learning_rate": 1.0119550360874381e-05, "loss": 0.9101, "step": 2494 }, { "epoch": 0.5115849907730162, "grad_norm": 1.2632067203521729, "learning_rate": 1.011290896471259e-05, "loss": 0.9064, "step": 2495 }, { "epoch": 0.5117900348574944, "grad_norm": 1.3751686811447144, "learning_rate": 1.0106267518742021e-05, "loss": 0.858, "step": 2496 }, { "epoch": 0.5119950789419725, "grad_norm": 1.2455960512161255, "learning_rate": 1.0099626025892491e-05, "loss": 0.9084, "step": 2497 }, { "epoch": 0.5122001230264507, "grad_norm": 1.2511013746261597, "learning_rate": 1.0092984489093833e-05, "loss": 0.8923, "step": 2498 }, { "epoch": 0.5124051671109289, "grad_norm": 1.3356717824935913, "learning_rate": 1.0086342911275904e-05, "loss": 0.9211, "step": 2499 }, { "epoch": 0.5126102111954071, "grad_norm": 1.205901861190796, "learning_rate": 1.0079701295368573e-05, "loss": 0.8803, "step": 2500 }, { "epoch": 0.5128152552798851, "grad_norm": 1.240453839302063, "learning_rate": 1.007305964430173e-05, "loss": 0.9525, "step": 2501 }, { "epoch": 0.5130202993643633, "grad_norm": 1.231988787651062, "learning_rate": 1.0066417961005283e-05, "loss": 0.9711, "step": 2502 }, { "epoch": 0.5132253434488415, "grad_norm": 1.2911053895950317, "learning_rate": 1.0059776248409148e-05, "loss": 0.932, "step": 2503 }, { "epoch": 0.5134303875333197, "grad_norm": 1.3627386093139648, "learning_rate": 1.0053134509443257e-05, "loss": 0.9366, "step": 2504 }, { "epoch": 0.5136354316177978, "grad_norm": 1.2292084693908691, "learning_rate": 1.0046492747037554e-05, "loss": 0.9211, "step": 2505 }, { "epoch": 0.513840475702276, "grad_norm": 1.3507120609283447, "learning_rate": 1.0039850964121993e-05, "loss": 0.9995, "step": 2506 }, { "epoch": 0.5140455197867542, "grad_norm": 1.2446120977401733, "learning_rate": 1.0033209163626539e-05, "loss": 0.8649, "step": 2507 }, { "epoch": 0.5142505638712324, "grad_norm": 1.162194013595581, "learning_rate": 1.0026567348481155e-05, "loss": 0.9601, "step": 2508 }, { "epoch": 0.5144556079557104, "grad_norm": 1.315999150276184, "learning_rate": 1.001992552161583e-05, "loss": 0.8929, "step": 2509 }, { "epoch": 0.5146606520401886, "grad_norm": 1.205919623374939, "learning_rate": 1.0013283685960535e-05, "loss": 0.9466, "step": 2510 }, { "epoch": 0.5148656961246668, "grad_norm": 1.2715667486190796, "learning_rate": 1.0006641844445264e-05, "loss": 0.8844, "step": 2511 }, { "epoch": 0.515070740209145, "grad_norm": 1.2861465215682983, "learning_rate": 1e-05, "loss": 1.0359, "step": 2512 }, { "epoch": 0.5152757842936231, "grad_norm": 1.2715524435043335, "learning_rate": 9.99335815555474e-06, "loss": 0.9471, "step": 2513 }, { "epoch": 0.5154808283781013, "grad_norm": 1.408806562423706, "learning_rate": 9.986716314039465e-06, "loss": 0.9358, "step": 2514 }, { "epoch": 0.5156858724625795, "grad_norm": 1.2803142070770264, "learning_rate": 9.980074478384175e-06, "loss": 0.9136, "step": 2515 }, { "epoch": 0.5158909165470577, "grad_norm": 1.19564950466156, "learning_rate": 9.973432651518847e-06, "loss": 0.976, "step": 2516 }, { "epoch": 0.5160959606315357, "grad_norm": 1.274158239364624, "learning_rate": 9.966790836373465e-06, "loss": 0.9941, "step": 2517 }, { "epoch": 0.5163010047160139, "grad_norm": 1.2409098148345947, "learning_rate": 9.960149035878009e-06, "loss": 0.8926, "step": 2518 }, { "epoch": 0.5165060488004921, "grad_norm": 1.1619361639022827, "learning_rate": 9.95350725296245e-06, "loss": 0.9009, "step": 2519 }, { "epoch": 0.5167110928849703, "grad_norm": 1.311531662940979, "learning_rate": 9.946865490556747e-06, "loss": 0.9648, "step": 2520 }, { "epoch": 0.5169161369694484, "grad_norm": 1.3129093647003174, "learning_rate": 9.940223751590857e-06, "loss": 0.9839, "step": 2521 }, { "epoch": 0.5171211810539266, "grad_norm": 1.2633354663848877, "learning_rate": 9.933582038994719e-06, "loss": 0.9922, "step": 2522 }, { "epoch": 0.5173262251384048, "grad_norm": 1.1990973949432373, "learning_rate": 9.92694035569827e-06, "loss": 0.9224, "step": 2523 }, { "epoch": 0.517531269222883, "grad_norm": 1.209861397743225, "learning_rate": 9.920298704631432e-06, "loss": 0.8715, "step": 2524 }, { "epoch": 0.517736313307361, "grad_norm": 1.1996482610702515, "learning_rate": 9.9136570887241e-06, "loss": 0.8937, "step": 2525 }, { "epoch": 0.5179413573918392, "grad_norm": 1.271267056465149, "learning_rate": 9.907015510906168e-06, "loss": 0.9267, "step": 2526 }, { "epoch": 0.5181464014763174, "grad_norm": 1.3994277715682983, "learning_rate": 9.90037397410751e-06, "loss": 0.9004, "step": 2527 }, { "epoch": 0.5183514455607956, "grad_norm": 1.1871463060379028, "learning_rate": 9.893732481257984e-06, "loss": 0.9469, "step": 2528 }, { "epoch": 0.5185564896452738, "grad_norm": 1.1876513957977295, "learning_rate": 9.887091035287414e-06, "loss": 0.9272, "step": 2529 }, { "epoch": 0.5187615337297519, "grad_norm": 1.3419098854064941, "learning_rate": 9.88044963912562e-06, "loss": 0.943, "step": 2530 }, { "epoch": 0.5189665778142301, "grad_norm": 1.3131284713745117, "learning_rate": 9.873808295702397e-06, "loss": 0.966, "step": 2531 }, { "epoch": 0.5191716218987082, "grad_norm": 1.2402759790420532, "learning_rate": 9.867167007947511e-06, "loss": 0.8407, "step": 2532 }, { "epoch": 0.5193766659831864, "grad_norm": 1.2355728149414062, "learning_rate": 9.860525778790711e-06, "loss": 0.9436, "step": 2533 }, { "epoch": 0.5195817100676645, "grad_norm": 1.352238416671753, "learning_rate": 9.853884611161709e-06, "loss": 0.9136, "step": 2534 }, { "epoch": 0.5197867541521427, "grad_norm": 1.171374797821045, "learning_rate": 9.8472435079902e-06, "loss": 0.934, "step": 2535 }, { "epoch": 0.5199917982366209, "grad_norm": 1.210503101348877, "learning_rate": 9.840602472205846e-06, "loss": 1.0027, "step": 2536 }, { "epoch": 0.5201968423210991, "grad_norm": 1.2484318017959595, "learning_rate": 9.833961506738282e-06, "loss": 0.948, "step": 2537 }, { "epoch": 0.5204018864055772, "grad_norm": 1.1301549673080444, "learning_rate": 9.827320614517109e-06, "loss": 0.8979, "step": 2538 }, { "epoch": 0.5206069304900554, "grad_norm": 1.2367044687271118, "learning_rate": 9.820679798471894e-06, "loss": 0.9067, "step": 2539 }, { "epoch": 0.5208119745745335, "grad_norm": 1.4090030193328857, "learning_rate": 9.814039061532176e-06, "loss": 1.0476, "step": 2540 }, { "epoch": 0.5210170186590117, "grad_norm": 1.3244054317474365, "learning_rate": 9.80739840662746e-06, "loss": 0.8538, "step": 2541 }, { "epoch": 0.5212220627434898, "grad_norm": 1.2167320251464844, "learning_rate": 9.80075783668721e-06, "loss": 0.9153, "step": 2542 }, { "epoch": 0.521427106827968, "grad_norm": 1.2909085750579834, "learning_rate": 9.79411735464085e-06, "loss": 0.9685, "step": 2543 }, { "epoch": 0.5216321509124462, "grad_norm": 1.2207692861557007, "learning_rate": 9.78747696341777e-06, "loss": 0.8943, "step": 2544 }, { "epoch": 0.5218371949969244, "grad_norm": 1.367404818534851, "learning_rate": 9.78083666594732e-06, "loss": 0.927, "step": 2545 }, { "epoch": 0.5220422390814025, "grad_norm": 1.2185900211334229, "learning_rate": 9.774196465158812e-06, "loss": 0.9135, "step": 2546 }, { "epoch": 0.5222472831658806, "grad_norm": 1.2608249187469482, "learning_rate": 9.767556363981503e-06, "loss": 0.9441, "step": 2547 }, { "epoch": 0.5224523272503588, "grad_norm": 1.2470569610595703, "learning_rate": 9.760916365344618e-06, "loss": 0.9807, "step": 2548 }, { "epoch": 0.522657371334837, "grad_norm": 1.1994211673736572, "learning_rate": 9.754276472177332e-06, "loss": 0.9193, "step": 2549 }, { "epoch": 0.5228624154193151, "grad_norm": 1.2398746013641357, "learning_rate": 9.747636687408778e-06, "loss": 1.0051, "step": 2550 }, { "epoch": 0.5230674595037933, "grad_norm": 1.2325339317321777, "learning_rate": 9.740997013968033e-06, "loss": 0.859, "step": 2551 }, { "epoch": 0.5232725035882715, "grad_norm": 1.471766471862793, "learning_rate": 9.734357454784131e-06, "loss": 1.0511, "step": 2552 }, { "epoch": 0.5234775476727497, "grad_norm": 1.1704081296920776, "learning_rate": 9.727718012786053e-06, "loss": 0.9495, "step": 2553 }, { "epoch": 0.5236825917572278, "grad_norm": 1.1972490549087524, "learning_rate": 9.721078690902729e-06, "loss": 0.906, "step": 2554 }, { "epoch": 0.523887635841706, "grad_norm": 1.2809455394744873, "learning_rate": 9.71443949206304e-06, "loss": 0.9625, "step": 2555 }, { "epoch": 0.5240926799261841, "grad_norm": 1.145875096321106, "learning_rate": 9.707800419195805e-06, "loss": 0.94, "step": 2556 }, { "epoch": 0.5242977240106623, "grad_norm": 1.3340507745742798, "learning_rate": 9.701161475229791e-06, "loss": 0.9072, "step": 2557 }, { "epoch": 0.5245027680951404, "grad_norm": 1.1426588296890259, "learning_rate": 9.69452266309371e-06, "loss": 0.8367, "step": 2558 }, { "epoch": 0.5247078121796186, "grad_norm": 1.2595794200897217, "learning_rate": 9.687883985716214e-06, "loss": 0.8224, "step": 2559 }, { "epoch": 0.5249128562640968, "grad_norm": 1.2140238285064697, "learning_rate": 9.681245446025903e-06, "loss": 0.9032, "step": 2560 }, { "epoch": 0.525117900348575, "grad_norm": 1.2559438943862915, "learning_rate": 9.674607046951297e-06, "loss": 0.9709, "step": 2561 }, { "epoch": 0.525322944433053, "grad_norm": 1.1447176933288574, "learning_rate": 9.66796879142087e-06, "loss": 0.8643, "step": 2562 }, { "epoch": 0.5255279885175312, "grad_norm": 1.253197431564331, "learning_rate": 9.661330682363033e-06, "loss": 0.9211, "step": 2563 }, { "epoch": 0.5257330326020094, "grad_norm": 1.3177670240402222, "learning_rate": 9.65469272270613e-06, "loss": 0.9348, "step": 2564 }, { "epoch": 0.5259380766864876, "grad_norm": 1.1699107885360718, "learning_rate": 9.648054915378427e-06, "loss": 0.8687, "step": 2565 }, { "epoch": 0.5261431207709658, "grad_norm": 1.1812400817871094, "learning_rate": 9.641417263308142e-06, "loss": 0.9862, "step": 2566 }, { "epoch": 0.5263481648554439, "grad_norm": 1.2786064147949219, "learning_rate": 9.634779769423412e-06, "loss": 0.9421, "step": 2567 }, { "epoch": 0.5265532089399221, "grad_norm": 1.2539150714874268, "learning_rate": 9.62814243665231e-06, "loss": 0.9007, "step": 2568 }, { "epoch": 0.5267582530244003, "grad_norm": 1.3367875814437866, "learning_rate": 9.621505267922836e-06, "loss": 0.855, "step": 2569 }, { "epoch": 0.5269632971088785, "grad_norm": 1.2257949113845825, "learning_rate": 9.614868266162915e-06, "loss": 0.9608, "step": 2570 }, { "epoch": 0.5271683411933565, "grad_norm": 1.2157360315322876, "learning_rate": 9.6082314343004e-06, "loss": 0.8243, "step": 2571 }, { "epoch": 0.5273733852778347, "grad_norm": 1.3515523672103882, "learning_rate": 9.601594775263073e-06, "loss": 0.9451, "step": 2572 }, { "epoch": 0.5275784293623129, "grad_norm": 1.2120684385299683, "learning_rate": 9.594958291978637e-06, "loss": 0.9218, "step": 2573 }, { "epoch": 0.5277834734467911, "grad_norm": 1.3365559577941895, "learning_rate": 9.588321987374714e-06, "loss": 1.0089, "step": 2574 }, { "epoch": 0.5279885175312692, "grad_norm": 1.3528624773025513, "learning_rate": 9.58168586437885e-06, "loss": 0.9073, "step": 2575 }, { "epoch": 0.5281935616157474, "grad_norm": 1.5622947216033936, "learning_rate": 9.575049925918511e-06, "loss": 0.9091, "step": 2576 }, { "epoch": 0.5283986057002256, "grad_norm": 1.2885171175003052, "learning_rate": 9.568414174921085e-06, "loss": 0.8652, "step": 2577 }, { "epoch": 0.5286036497847038, "grad_norm": 1.2833869457244873, "learning_rate": 9.561778614313876e-06, "loss": 0.9795, "step": 2578 }, { "epoch": 0.5288086938691818, "grad_norm": 1.2480567693710327, "learning_rate": 9.555143247024095e-06, "loss": 0.9314, "step": 2579 }, { "epoch": 0.52901373795366, "grad_norm": 1.364344596862793, "learning_rate": 9.548508075978876e-06, "loss": 0.8095, "step": 2580 }, { "epoch": 0.5292187820381382, "grad_norm": 1.2081533670425415, "learning_rate": 9.541873104105267e-06, "loss": 0.9731, "step": 2581 }, { "epoch": 0.5294238261226164, "grad_norm": 1.325257658958435, "learning_rate": 9.535238334330234e-06, "loss": 0.9008, "step": 2582 }, { "epoch": 0.5296288702070945, "grad_norm": 1.1992381811141968, "learning_rate": 9.528603769580633e-06, "loss": 0.8681, "step": 2583 }, { "epoch": 0.5298339142915727, "grad_norm": 1.2786784172058105, "learning_rate": 9.521969412783246e-06, "loss": 0.9959, "step": 2584 }, { "epoch": 0.5300389583760509, "grad_norm": 1.282569169998169, "learning_rate": 9.515335266864766e-06, "loss": 0.8564, "step": 2585 }, { "epoch": 0.530244002460529, "grad_norm": 1.3204591274261475, "learning_rate": 9.508701334751782e-06, "loss": 0.8528, "step": 2586 }, { "epoch": 0.5304490465450071, "grad_norm": 1.3130778074264526, "learning_rate": 9.502067619370794e-06, "loss": 0.9588, "step": 2587 }, { "epoch": 0.5306540906294853, "grad_norm": 1.2454042434692383, "learning_rate": 9.49543412364821e-06, "loss": 0.9342, "step": 2588 }, { "epoch": 0.5308591347139635, "grad_norm": 1.4081666469573975, "learning_rate": 9.48880085051033e-06, "loss": 0.9751, "step": 2589 }, { "epoch": 0.5310641787984417, "grad_norm": 1.2147860527038574, "learning_rate": 9.482167802883366e-06, "loss": 0.9383, "step": 2590 }, { "epoch": 0.5312692228829198, "grad_norm": 1.2595422267913818, "learning_rate": 9.475534983693435e-06, "loss": 0.8764, "step": 2591 }, { "epoch": 0.531474266967398, "grad_norm": 1.2212014198303223, "learning_rate": 9.468902395866532e-06, "loss": 0.9075, "step": 2592 }, { "epoch": 0.5316793110518762, "grad_norm": 1.4385199546813965, "learning_rate": 9.462270042328571e-06, "loss": 0.9434, "step": 2593 }, { "epoch": 0.5318843551363543, "grad_norm": 1.256169080734253, "learning_rate": 9.455637926005357e-06, "loss": 0.9551, "step": 2594 }, { "epoch": 0.5320893992208324, "grad_norm": 1.1774650812149048, "learning_rate": 9.449006049822585e-06, "loss": 0.8984, "step": 2595 }, { "epoch": 0.5322944433053106, "grad_norm": 1.2684400081634521, "learning_rate": 9.442374416705853e-06, "loss": 1.0034, "step": 2596 }, { "epoch": 0.5324994873897888, "grad_norm": 1.156726598739624, "learning_rate": 9.435743029580638e-06, "loss": 0.9633, "step": 2597 }, { "epoch": 0.532704531474267, "grad_norm": 1.204509973526001, "learning_rate": 9.42911189137232e-06, "loss": 0.8853, "step": 2598 }, { "epoch": 0.5329095755587452, "grad_norm": 1.2542448043823242, "learning_rate": 9.422481005006171e-06, "loss": 0.9001, "step": 2599 }, { "epoch": 0.5331146196432233, "grad_norm": 1.2935823202133179, "learning_rate": 9.415850373407342e-06, "loss": 1.0334, "step": 2600 }, { "epoch": 0.5333196637277015, "grad_norm": 1.2287760972976685, "learning_rate": 9.40921999950088e-06, "loss": 0.9439, "step": 2601 }, { "epoch": 0.5335247078121796, "grad_norm": 1.202807903289795, "learning_rate": 9.402589886211711e-06, "loss": 0.8858, "step": 2602 }, { "epoch": 0.5337297518966578, "grad_norm": 1.1775490045547485, "learning_rate": 9.395960036464652e-06, "loss": 0.9836, "step": 2603 }, { "epoch": 0.5339347959811359, "grad_norm": 1.2488547563552856, "learning_rate": 9.3893304531844e-06, "loss": 0.9297, "step": 2604 }, { "epoch": 0.5341398400656141, "grad_norm": 1.3514220714569092, "learning_rate": 9.382701139295542e-06, "loss": 0.9642, "step": 2605 }, { "epoch": 0.5343448841500923, "grad_norm": 1.2892866134643555, "learning_rate": 9.376072097722533e-06, "loss": 0.8587, "step": 2606 }, { "epoch": 0.5345499282345705, "grad_norm": 1.3748157024383545, "learning_rate": 9.369443331389718e-06, "loss": 0.9831, "step": 2607 }, { "epoch": 0.5347549723190486, "grad_norm": 1.2683724164962769, "learning_rate": 9.362814843221319e-06, "loss": 0.9163, "step": 2608 }, { "epoch": 0.5349600164035268, "grad_norm": 1.3470213413238525, "learning_rate": 9.35618663614143e-06, "loss": 0.923, "step": 2609 }, { "epoch": 0.5351650604880049, "grad_norm": 1.2228507995605469, "learning_rate": 9.349558713074036e-06, "loss": 0.8969, "step": 2610 }, { "epoch": 0.5353701045724831, "grad_norm": 1.346680998802185, "learning_rate": 9.342931076942973e-06, "loss": 0.9992, "step": 2611 }, { "epoch": 0.5355751486569612, "grad_norm": 1.3181427717208862, "learning_rate": 9.336303730671968e-06, "loss": 0.9505, "step": 2612 }, { "epoch": 0.5357801927414394, "grad_norm": 1.2649656534194946, "learning_rate": 9.329676677184614e-06, "loss": 0.9312, "step": 2613 }, { "epoch": 0.5359852368259176, "grad_norm": 1.186234951019287, "learning_rate": 9.323049919404385e-06, "loss": 0.9524, "step": 2614 }, { "epoch": 0.5361902809103958, "grad_norm": 1.298177719116211, "learning_rate": 9.316423460254602e-06, "loss": 0.9638, "step": 2615 }, { "epoch": 0.5363953249948739, "grad_norm": 1.2300537824630737, "learning_rate": 9.309797302658474e-06, "loss": 0.9084, "step": 2616 }, { "epoch": 0.536600369079352, "grad_norm": 1.20341157913208, "learning_rate": 9.303171449539074e-06, "loss": 0.9256, "step": 2617 }, { "epoch": 0.5368054131638302, "grad_norm": 1.1933828592300415, "learning_rate": 9.296545903819333e-06, "loss": 0.889, "step": 2618 }, { "epoch": 0.5370104572483084, "grad_norm": 1.2378652095794678, "learning_rate": 9.28992066842206e-06, "loss": 0.9753, "step": 2619 }, { "epoch": 0.5372155013327865, "grad_norm": 1.2604480981826782, "learning_rate": 9.283295746269904e-06, "loss": 0.8706, "step": 2620 }, { "epoch": 0.5374205454172647, "grad_norm": 1.2556684017181396, "learning_rate": 9.276671140285396e-06, "loss": 0.8867, "step": 2621 }, { "epoch": 0.5376255895017429, "grad_norm": 1.2417837381362915, "learning_rate": 9.270046853390924e-06, "loss": 0.9722, "step": 2622 }, { "epoch": 0.5378306335862211, "grad_norm": 1.1771860122680664, "learning_rate": 9.263422888508736e-06, "loss": 0.8836, "step": 2623 }, { "epoch": 0.5380356776706992, "grad_norm": 1.3804850578308105, "learning_rate": 9.256799248560925e-06, "loss": 0.9369, "step": 2624 }, { "epoch": 0.5382407217551773, "grad_norm": 1.288166880607605, "learning_rate": 9.250175936469457e-06, "loss": 0.9504, "step": 2625 }, { "epoch": 0.5384457658396555, "grad_norm": 1.3404273986816406, "learning_rate": 9.243552955156143e-06, "loss": 0.8536, "step": 2626 }, { "epoch": 0.5386508099241337, "grad_norm": 1.291506290435791, "learning_rate": 9.236930307542654e-06, "loss": 0.9601, "step": 2627 }, { "epoch": 0.5388558540086118, "grad_norm": 1.240742564201355, "learning_rate": 9.230307996550517e-06, "loss": 0.9076, "step": 2628 }, { "epoch": 0.53906089809309, "grad_norm": 1.2131812572479248, "learning_rate": 9.223686025101092e-06, "loss": 0.9228, "step": 2629 }, { "epoch": 0.5392659421775682, "grad_norm": 1.2543343305587769, "learning_rate": 9.217064396115612e-06, "loss": 0.9253, "step": 2630 }, { "epoch": 0.5394709862620464, "grad_norm": 1.1774606704711914, "learning_rate": 9.210443112515149e-06, "loss": 0.8534, "step": 2631 }, { "epoch": 0.5396760303465244, "grad_norm": 1.2819443941116333, "learning_rate": 9.203822177220621e-06, "loss": 0.9475, "step": 2632 }, { "epoch": 0.5398810744310026, "grad_norm": 1.2037217617034912, "learning_rate": 9.19720159315279e-06, "loss": 0.933, "step": 2633 }, { "epoch": 0.5400861185154808, "grad_norm": 1.208664059638977, "learning_rate": 9.190581363232274e-06, "loss": 0.9124, "step": 2634 }, { "epoch": 0.540291162599959, "grad_norm": 1.3013203144073486, "learning_rate": 9.183961490379524e-06, "loss": 0.9407, "step": 2635 }, { "epoch": 0.5404962066844372, "grad_norm": 1.230384349822998, "learning_rate": 9.177341977514837e-06, "loss": 0.813, "step": 2636 }, { "epoch": 0.5407012507689153, "grad_norm": 1.2800142765045166, "learning_rate": 9.170722827558357e-06, "loss": 0.9326, "step": 2637 }, { "epoch": 0.5409062948533935, "grad_norm": 1.1232984066009521, "learning_rate": 9.164104043430056e-06, "loss": 0.8713, "step": 2638 }, { "epoch": 0.5411113389378717, "grad_norm": 1.3137127161026, "learning_rate": 9.157485628049756e-06, "loss": 0.9759, "step": 2639 }, { "epoch": 0.5413163830223499, "grad_norm": 1.3073041439056396, "learning_rate": 9.150867584337106e-06, "loss": 0.9358, "step": 2640 }, { "epoch": 0.5415214271068279, "grad_norm": 1.2616472244262695, "learning_rate": 9.144249915211605e-06, "loss": 0.9841, "step": 2641 }, { "epoch": 0.5417264711913061, "grad_norm": 1.4048622846603394, "learning_rate": 9.13763262359257e-06, "loss": 0.9691, "step": 2642 }, { "epoch": 0.5419315152757843, "grad_norm": 1.2434629201889038, "learning_rate": 9.131015712399163e-06, "loss": 0.9488, "step": 2643 }, { "epoch": 0.5421365593602625, "grad_norm": 1.27880859375, "learning_rate": 9.124399184550377e-06, "loss": 0.9527, "step": 2644 }, { "epoch": 0.5423416034447406, "grad_norm": 1.2796504497528076, "learning_rate": 9.117783042965031e-06, "loss": 0.901, "step": 2645 }, { "epoch": 0.5425466475292188, "grad_norm": 1.031179666519165, "learning_rate": 9.111167290561783e-06, "loss": 0.7687, "step": 2646 }, { "epoch": 0.542751691613697, "grad_norm": 1.1657516956329346, "learning_rate": 9.104551930259101e-06, "loss": 0.8486, "step": 2647 }, { "epoch": 0.5429567356981752, "grad_norm": 1.2862021923065186, "learning_rate": 9.097936964975302e-06, "loss": 0.9432, "step": 2648 }, { "epoch": 0.5431617797826532, "grad_norm": 1.3397216796875, "learning_rate": 9.091322397628513e-06, "loss": 0.9108, "step": 2649 }, { "epoch": 0.5433668238671314, "grad_norm": 1.3013522624969482, "learning_rate": 9.084708231136694e-06, "loss": 1.0663, "step": 2650 }, { "epoch": 0.5435718679516096, "grad_norm": 1.1739778518676758, "learning_rate": 9.078094468417625e-06, "loss": 0.8836, "step": 2651 }, { "epoch": 0.5437769120360878, "grad_norm": 1.3249365091323853, "learning_rate": 9.071481112388905e-06, "loss": 0.8941, "step": 2652 }, { "epoch": 0.5439819561205659, "grad_norm": 1.2114590406417847, "learning_rate": 9.064868165967957e-06, "loss": 0.9279, "step": 2653 }, { "epoch": 0.5441870002050441, "grad_norm": 1.2486759424209595, "learning_rate": 9.058255632072028e-06, "loss": 0.9799, "step": 2654 }, { "epoch": 0.5443920442895223, "grad_norm": 1.2168850898742676, "learning_rate": 9.051643513618176e-06, "loss": 0.8669, "step": 2655 }, { "epoch": 0.5445970883740004, "grad_norm": 1.2313363552093506, "learning_rate": 9.045031813523274e-06, "loss": 1.0107, "step": 2656 }, { "epoch": 0.5448021324584785, "grad_norm": 1.2463573217391968, "learning_rate": 9.038420534704015e-06, "loss": 0.9154, "step": 2657 }, { "epoch": 0.5450071765429567, "grad_norm": 1.2902734279632568, "learning_rate": 9.031809680076909e-06, "loss": 0.9426, "step": 2658 }, { "epoch": 0.5452122206274349, "grad_norm": 1.2255957126617432, "learning_rate": 9.025199252558276e-06, "loss": 0.9259, "step": 2659 }, { "epoch": 0.5454172647119131, "grad_norm": 1.2463078498840332, "learning_rate": 9.01858925506424e-06, "loss": 1.0069, "step": 2660 }, { "epoch": 0.5456223087963912, "grad_norm": 1.2137595415115356, "learning_rate": 9.011979690510746e-06, "loss": 0.9408, "step": 2661 }, { "epoch": 0.5458273528808694, "grad_norm": 1.2926956415176392, "learning_rate": 9.005370561813545e-06, "loss": 0.9256, "step": 2662 }, { "epoch": 0.5460323969653476, "grad_norm": 1.2071932554244995, "learning_rate": 8.998761871888195e-06, "loss": 0.9191, "step": 2663 }, { "epoch": 0.5462374410498257, "grad_norm": 1.3560930490493774, "learning_rate": 8.992153623650059e-06, "loss": 0.8921, "step": 2664 }, { "epoch": 0.5464424851343038, "grad_norm": 1.20310378074646, "learning_rate": 8.985545820014304e-06, "loss": 0.9331, "step": 2665 }, { "epoch": 0.546647529218782, "grad_norm": 1.3214046955108643, "learning_rate": 8.978938463895908e-06, "loss": 0.9055, "step": 2666 }, { "epoch": 0.5468525733032602, "grad_norm": 1.2099775075912476, "learning_rate": 8.972331558209644e-06, "loss": 0.9068, "step": 2667 }, { "epoch": 0.5470576173877384, "grad_norm": 1.2700692415237427, "learning_rate": 8.965725105870092e-06, "loss": 0.9185, "step": 2668 }, { "epoch": 0.5472626614722165, "grad_norm": 1.313269019126892, "learning_rate": 8.959119109791624e-06, "loss": 0.933, "step": 2669 }, { "epoch": 0.5474677055566947, "grad_norm": 1.2996975183486938, "learning_rate": 8.952513572888418e-06, "loss": 0.9028, "step": 2670 }, { "epoch": 0.5476727496411729, "grad_norm": 1.3193775415420532, "learning_rate": 8.945908498074449e-06, "loss": 0.9608, "step": 2671 }, { "epoch": 0.547877793725651, "grad_norm": 1.3790132999420166, "learning_rate": 8.939303888263485e-06, "loss": 0.8806, "step": 2672 }, { "epoch": 0.5480828378101292, "grad_norm": 1.2733674049377441, "learning_rate": 8.932699746369096e-06, "loss": 0.9159, "step": 2673 }, { "epoch": 0.5482878818946073, "grad_norm": 1.1900298595428467, "learning_rate": 8.926096075304629e-06, "loss": 0.8203, "step": 2674 }, { "epoch": 0.5484929259790855, "grad_norm": 1.168142557144165, "learning_rate": 8.919492877983241e-06, "loss": 0.9534, "step": 2675 }, { "epoch": 0.5486979700635637, "grad_norm": 1.2475192546844482, "learning_rate": 8.912890157317872e-06, "loss": 0.9149, "step": 2676 }, { "epoch": 0.5489030141480419, "grad_norm": 1.3004024028778076, "learning_rate": 8.906287916221259e-06, "loss": 0.8913, "step": 2677 }, { "epoch": 0.54910805823252, "grad_norm": 1.1809210777282715, "learning_rate": 8.89968615760591e-06, "loss": 0.9904, "step": 2678 }, { "epoch": 0.5493131023169981, "grad_norm": 1.1138166189193726, "learning_rate": 8.893084884384138e-06, "loss": 0.8704, "step": 2679 }, { "epoch": 0.5495181464014763, "grad_norm": 1.3655049800872803, "learning_rate": 8.886484099468036e-06, "loss": 0.8954, "step": 2680 }, { "epoch": 0.5497231904859545, "grad_norm": 1.3600361347198486, "learning_rate": 8.879883805769478e-06, "loss": 0.9103, "step": 2681 }, { "epoch": 0.5499282345704326, "grad_norm": 1.2175672054290771, "learning_rate": 8.873284006200129e-06, "loss": 0.9807, "step": 2682 }, { "epoch": 0.5501332786549108, "grad_norm": 1.380989909172058, "learning_rate": 8.866684703671427e-06, "loss": 0.9715, "step": 2683 }, { "epoch": 0.550338322739389, "grad_norm": 1.2200899124145508, "learning_rate": 8.860085901094595e-06, "loss": 0.956, "step": 2684 }, { "epoch": 0.5505433668238672, "grad_norm": 1.202836275100708, "learning_rate": 8.853487601380637e-06, "loss": 0.9459, "step": 2685 }, { "epoch": 0.5507484109083453, "grad_norm": 1.2248189449310303, "learning_rate": 8.846889807440338e-06, "loss": 0.8419, "step": 2686 }, { "epoch": 0.5509534549928234, "grad_norm": 1.2898409366607666, "learning_rate": 8.840292522184247e-06, "loss": 0.8922, "step": 2687 }, { "epoch": 0.5511584990773016, "grad_norm": 1.351480484008789, "learning_rate": 8.833695748522702e-06, "loss": 0.8966, "step": 2688 }, { "epoch": 0.5513635431617798, "grad_norm": 1.1982314586639404, "learning_rate": 8.827099489365809e-06, "loss": 0.9245, "step": 2689 }, { "epoch": 0.5515685872462579, "grad_norm": 1.3209667205810547, "learning_rate": 8.82050374762345e-06, "loss": 0.9712, "step": 2690 }, { "epoch": 0.5517736313307361, "grad_norm": 1.2526299953460693, "learning_rate": 8.813908526205282e-06, "loss": 0.8876, "step": 2691 }, { "epoch": 0.5519786754152143, "grad_norm": 1.2024112939834595, "learning_rate": 8.807313828020715e-06, "loss": 0.9342, "step": 2692 }, { "epoch": 0.5521837194996925, "grad_norm": 1.2143619060516357, "learning_rate": 8.80071965597895e-06, "loss": 0.9175, "step": 2693 }, { "epoch": 0.5523887635841706, "grad_norm": 1.168683409690857, "learning_rate": 8.794126012988948e-06, "loss": 0.8247, "step": 2694 }, { "epoch": 0.5525938076686487, "grad_norm": 1.169230341911316, "learning_rate": 8.787532901959435e-06, "loss": 0.9266, "step": 2695 }, { "epoch": 0.5527988517531269, "grad_norm": 1.2013623714447021, "learning_rate": 8.780940325798894e-06, "loss": 0.9078, "step": 2696 }, { "epoch": 0.5530038958376051, "grad_norm": 1.46757173538208, "learning_rate": 8.774348287415589e-06, "loss": 0.8935, "step": 2697 }, { "epoch": 0.5532089399220832, "grad_norm": 1.1673400402069092, "learning_rate": 8.76775678971754e-06, "loss": 0.9579, "step": 2698 }, { "epoch": 0.5534139840065614, "grad_norm": 1.2092177867889404, "learning_rate": 8.76116583561252e-06, "loss": 0.862, "step": 2699 }, { "epoch": 0.5536190280910396, "grad_norm": 1.3194494247436523, "learning_rate": 8.754575428008078e-06, "loss": 0.955, "step": 2700 }, { "epoch": 0.5538240721755178, "grad_norm": 1.2659921646118164, "learning_rate": 8.747985569811505e-06, "loss": 0.8461, "step": 2701 }, { "epoch": 0.5540291162599958, "grad_norm": 1.2160357236862183, "learning_rate": 8.74139626392986e-06, "loss": 0.8901, "step": 2702 }, { "epoch": 0.554234160344474, "grad_norm": 1.3145984411239624, "learning_rate": 8.734807513269957e-06, "loss": 1.0302, "step": 2703 }, { "epoch": 0.5544392044289522, "grad_norm": 1.1818634271621704, "learning_rate": 8.728219320738369e-06, "loss": 0.9345, "step": 2704 }, { "epoch": 0.5546442485134304, "grad_norm": 1.3266900777816772, "learning_rate": 8.721631689241408e-06, "loss": 0.9156, "step": 2705 }, { "epoch": 0.5548492925979086, "grad_norm": 1.2788118124008179, "learning_rate": 8.715044621685155e-06, "loss": 0.8883, "step": 2706 }, { "epoch": 0.5550543366823867, "grad_norm": 1.246220588684082, "learning_rate": 8.708458120975436e-06, "loss": 0.9624, "step": 2707 }, { "epoch": 0.5552593807668649, "grad_norm": 1.2443253993988037, "learning_rate": 8.701872190017824e-06, "loss": 0.857, "step": 2708 }, { "epoch": 0.5554644248513431, "grad_norm": 1.3377970457077026, "learning_rate": 8.69528683171765e-06, "loss": 0.9363, "step": 2709 }, { "epoch": 0.5556694689358213, "grad_norm": 1.1927706003189087, "learning_rate": 8.688702048979974e-06, "loss": 0.933, "step": 2710 }, { "epoch": 0.5558745130202993, "grad_norm": 1.2837737798690796, "learning_rate": 8.682117844709622e-06, "loss": 0.9749, "step": 2711 }, { "epoch": 0.5560795571047775, "grad_norm": 1.358483910560608, "learning_rate": 8.675534221811156e-06, "loss": 0.9328, "step": 2712 }, { "epoch": 0.5562846011892557, "grad_norm": 1.2604961395263672, "learning_rate": 8.66895118318888e-06, "loss": 0.9479, "step": 2713 }, { "epoch": 0.5564896452737339, "grad_norm": 1.1914023160934448, "learning_rate": 8.662368731746843e-06, "loss": 0.9026, "step": 2714 }, { "epoch": 0.556694689358212, "grad_norm": 1.2375175952911377, "learning_rate": 8.655786870388837e-06, "loss": 0.8917, "step": 2715 }, { "epoch": 0.5568997334426902, "grad_norm": 1.1974695920944214, "learning_rate": 8.649205602018386e-06, "loss": 0.8484, "step": 2716 }, { "epoch": 0.5571047775271684, "grad_norm": 1.2474240064620972, "learning_rate": 8.64262492953876e-06, "loss": 0.8732, "step": 2717 }, { "epoch": 0.5573098216116465, "grad_norm": 1.263930320739746, "learning_rate": 8.636044855852968e-06, "loss": 0.9053, "step": 2718 }, { "epoch": 0.5575148656961246, "grad_norm": 1.3110692501068115, "learning_rate": 8.629465383863743e-06, "loss": 0.9173, "step": 2719 }, { "epoch": 0.5577199097806028, "grad_norm": 1.1069291830062866, "learning_rate": 8.622886516473562e-06, "loss": 0.9112, "step": 2720 }, { "epoch": 0.557924953865081, "grad_norm": 1.2697961330413818, "learning_rate": 8.616308256584636e-06, "loss": 0.9051, "step": 2721 }, { "epoch": 0.5581299979495592, "grad_norm": 1.1686702966690063, "learning_rate": 8.60973060709891e-06, "loss": 0.9429, "step": 2722 }, { "epoch": 0.5583350420340373, "grad_norm": 1.213254690170288, "learning_rate": 8.60315357091804e-06, "loss": 0.8751, "step": 2723 }, { "epoch": 0.5585400861185155, "grad_norm": 1.255384922027588, "learning_rate": 8.59657715094344e-06, "loss": 0.9757, "step": 2724 }, { "epoch": 0.5587451302029937, "grad_norm": 1.2719601392745972, "learning_rate": 8.590001350076232e-06, "loss": 0.905, "step": 2725 }, { "epoch": 0.5589501742874718, "grad_norm": 1.2054506540298462, "learning_rate": 8.583426171217274e-06, "loss": 0.8888, "step": 2726 }, { "epoch": 0.5591552183719499, "grad_norm": 1.5677227973937988, "learning_rate": 8.576851617267151e-06, "loss": 0.7973, "step": 2727 }, { "epoch": 0.5593602624564281, "grad_norm": 1.3307733535766602, "learning_rate": 8.57027769112616e-06, "loss": 0.8944, "step": 2728 }, { "epoch": 0.5595653065409063, "grad_norm": 1.200449824333191, "learning_rate": 8.563704395694335e-06, "loss": 0.9578, "step": 2729 }, { "epoch": 0.5597703506253845, "grad_norm": 1.25844407081604, "learning_rate": 8.557131733871424e-06, "loss": 0.9192, "step": 2730 }, { "epoch": 0.5599753947098626, "grad_norm": 1.296341896057129, "learning_rate": 8.550559708556901e-06, "loss": 0.9122, "step": 2731 }, { "epoch": 0.5601804387943408, "grad_norm": 1.3103001117706299, "learning_rate": 8.543988322649954e-06, "loss": 0.9138, "step": 2732 }, { "epoch": 0.560385482878819, "grad_norm": 1.1208125352859497, "learning_rate": 8.537417579049489e-06, "loss": 0.8384, "step": 2733 }, { "epoch": 0.5605905269632971, "grad_norm": 1.2221094369888306, "learning_rate": 8.530847480654131e-06, "loss": 0.8589, "step": 2734 }, { "epoch": 0.5607955710477752, "grad_norm": 1.3458689451217651, "learning_rate": 8.524278030362223e-06, "loss": 0.9112, "step": 2735 }, { "epoch": 0.5610006151322534, "grad_norm": 1.3728989362716675, "learning_rate": 8.51770923107182e-06, "loss": 0.9189, "step": 2736 }, { "epoch": 0.5612056592167316, "grad_norm": 1.246221899986267, "learning_rate": 8.511141085680684e-06, "loss": 0.9226, "step": 2737 }, { "epoch": 0.5614107033012098, "grad_norm": 1.2073570489883423, "learning_rate": 8.504573597086292e-06, "loss": 0.9945, "step": 2738 }, { "epoch": 0.5616157473856879, "grad_norm": 1.208510160446167, "learning_rate": 8.498006768185839e-06, "loss": 0.9327, "step": 2739 }, { "epoch": 0.5618207914701661, "grad_norm": 1.2832471132278442, "learning_rate": 8.491440601876222e-06, "loss": 0.9173, "step": 2740 }, { "epoch": 0.5620258355546442, "grad_norm": 1.240655541419983, "learning_rate": 8.484875101054042e-06, "loss": 0.9351, "step": 2741 }, { "epoch": 0.5622308796391224, "grad_norm": 1.2403103113174438, "learning_rate": 8.478310268615612e-06, "loss": 0.9548, "step": 2742 }, { "epoch": 0.5624359237236006, "grad_norm": 1.2685296535491943, "learning_rate": 8.47174610745695e-06, "loss": 0.8982, "step": 2743 }, { "epoch": 0.5626409678080787, "grad_norm": 1.1501327753067017, "learning_rate": 8.46518262047378e-06, "loss": 0.9181, "step": 2744 }, { "epoch": 0.5628460118925569, "grad_norm": 1.2900278568267822, "learning_rate": 8.45861981056152e-06, "loss": 0.9353, "step": 2745 }, { "epoch": 0.5630510559770351, "grad_norm": 1.2516067028045654, "learning_rate": 8.452057680615295e-06, "loss": 0.9148, "step": 2746 }, { "epoch": 0.5632561000615133, "grad_norm": 1.1882801055908203, "learning_rate": 8.445496233529934e-06, "loss": 0.9242, "step": 2747 }, { "epoch": 0.5634611441459914, "grad_norm": 1.1981087923049927, "learning_rate": 8.438935472199955e-06, "loss": 0.8888, "step": 2748 }, { "epoch": 0.5636661882304695, "grad_norm": 1.2311785221099854, "learning_rate": 8.432375399519587e-06, "loss": 0.9652, "step": 2749 }, { "epoch": 0.5638712323149477, "grad_norm": 1.3374249935150146, "learning_rate": 8.425816018382738e-06, "loss": 0.9799, "step": 2750 }, { "epoch": 0.5640762763994259, "grad_norm": 1.2787293195724487, "learning_rate": 8.419257331683023e-06, "loss": 0.9112, "step": 2751 }, { "epoch": 0.564281320483904, "grad_norm": 1.1837459802627563, "learning_rate": 8.41269934231375e-06, "loss": 0.8563, "step": 2752 }, { "epoch": 0.5644863645683822, "grad_norm": 1.168445348739624, "learning_rate": 8.406142053167917e-06, "loss": 0.9939, "step": 2753 }, { "epoch": 0.5646914086528604, "grad_norm": 1.2456485033035278, "learning_rate": 8.399585467138215e-06, "loss": 0.8784, "step": 2754 }, { "epoch": 0.5648964527373386, "grad_norm": 1.3345310688018799, "learning_rate": 8.393029587117017e-06, "loss": 0.9422, "step": 2755 }, { "epoch": 0.5651014968218167, "grad_norm": 1.180196762084961, "learning_rate": 8.386474415996392e-06, "loss": 0.8682, "step": 2756 }, { "epoch": 0.5653065409062948, "grad_norm": 1.2249937057495117, "learning_rate": 8.3799199566681e-06, "loss": 0.865, "step": 2757 }, { "epoch": 0.565511584990773, "grad_norm": 1.2347322702407837, "learning_rate": 8.373366212023582e-06, "loss": 0.9267, "step": 2758 }, { "epoch": 0.5657166290752512, "grad_norm": 1.1771379709243774, "learning_rate": 8.366813184953955e-06, "loss": 0.9621, "step": 2759 }, { "epoch": 0.5659216731597293, "grad_norm": 1.3446813821792603, "learning_rate": 8.360260878350035e-06, "loss": 1.009, "step": 2760 }, { "epoch": 0.5661267172442075, "grad_norm": 1.2609124183654785, "learning_rate": 8.353709295102315e-06, "loss": 0.9219, "step": 2761 }, { "epoch": 0.5663317613286857, "grad_norm": 1.424550175666809, "learning_rate": 8.34715843810096e-06, "loss": 0.8731, "step": 2762 }, { "epoch": 0.5665368054131639, "grad_norm": 1.280990719795227, "learning_rate": 8.340608310235828e-06, "loss": 0.9283, "step": 2763 }, { "epoch": 0.566741849497642, "grad_norm": 1.1588928699493408, "learning_rate": 8.334058914396448e-06, "loss": 0.8851, "step": 2764 }, { "epoch": 0.5669468935821201, "grad_norm": 1.231658697128296, "learning_rate": 8.327510253472023e-06, "loss": 0.9296, "step": 2765 }, { "epoch": 0.5671519376665983, "grad_norm": 1.3010544776916504, "learning_rate": 8.32096233035144e-06, "loss": 0.9527, "step": 2766 }, { "epoch": 0.5673569817510765, "grad_norm": 1.2054173946380615, "learning_rate": 8.314415147923254e-06, "loss": 0.9504, "step": 2767 }, { "epoch": 0.5675620258355546, "grad_norm": 1.2666850090026855, "learning_rate": 8.307868709075703e-06, "loss": 0.9025, "step": 2768 }, { "epoch": 0.5677670699200328, "grad_norm": 1.187322974205017, "learning_rate": 8.30132301669668e-06, "loss": 0.9229, "step": 2769 }, { "epoch": 0.567972114004511, "grad_norm": 1.2855030298233032, "learning_rate": 8.294778073673762e-06, "loss": 0.9741, "step": 2770 }, { "epoch": 0.5681771580889892, "grad_norm": 1.236879825592041, "learning_rate": 8.288233882894193e-06, "loss": 0.8462, "step": 2771 }, { "epoch": 0.5683822021734672, "grad_norm": 1.2457516193389893, "learning_rate": 8.281690447244887e-06, "loss": 0.9043, "step": 2772 }, { "epoch": 0.5685872462579454, "grad_norm": 1.2128303050994873, "learning_rate": 8.275147769612415e-06, "loss": 0.8589, "step": 2773 }, { "epoch": 0.5687922903424236, "grad_norm": 1.1875927448272705, "learning_rate": 8.268605852883024e-06, "loss": 0.825, "step": 2774 }, { "epoch": 0.5689973344269018, "grad_norm": 1.3792457580566406, "learning_rate": 8.26206469994262e-06, "loss": 0.8963, "step": 2775 }, { "epoch": 0.5692023785113799, "grad_norm": 1.2549617290496826, "learning_rate": 8.25552431367678e-06, "loss": 0.8984, "step": 2776 }, { "epoch": 0.5694074225958581, "grad_norm": 1.196584939956665, "learning_rate": 8.248984696970732e-06, "loss": 0.8415, "step": 2777 }, { "epoch": 0.5696124666803363, "grad_norm": 1.2183973789215088, "learning_rate": 8.242445852709368e-06, "loss": 0.925, "step": 2778 }, { "epoch": 0.5698175107648145, "grad_norm": 1.2880336046218872, "learning_rate": 8.235907783777241e-06, "loss": 0.89, "step": 2779 }, { "epoch": 0.5700225548492927, "grad_norm": 1.2591803073883057, "learning_rate": 8.229370493058562e-06, "loss": 0.9094, "step": 2780 }, { "epoch": 0.5702275989337707, "grad_norm": 1.3909491300582886, "learning_rate": 8.222833983437202e-06, "loss": 0.9744, "step": 2781 }, { "epoch": 0.5704326430182489, "grad_norm": 1.294554591178894, "learning_rate": 8.216298257796677e-06, "loss": 0.9203, "step": 2782 }, { "epoch": 0.5706376871027271, "grad_norm": 1.2701252698898315, "learning_rate": 8.209763319020168e-06, "loss": 0.9273, "step": 2783 }, { "epoch": 0.5708427311872053, "grad_norm": 1.1478924751281738, "learning_rate": 8.2032291699905e-06, "loss": 0.861, "step": 2784 }, { "epoch": 0.5710477752716834, "grad_norm": 1.489141821861267, "learning_rate": 8.19669581359016e-06, "loss": 0.8796, "step": 2785 }, { "epoch": 0.5712528193561616, "grad_norm": 1.321237325668335, "learning_rate": 8.190163252701282e-06, "loss": 0.8306, "step": 2786 }, { "epoch": 0.5714578634406398, "grad_norm": 1.282218337059021, "learning_rate": 8.183631490205636e-06, "loss": 0.9404, "step": 2787 }, { "epoch": 0.571662907525118, "grad_norm": 1.3597605228424072, "learning_rate": 8.177100528984659e-06, "loss": 0.8609, "step": 2788 }, { "epoch": 0.571867951609596, "grad_norm": 1.2519108057022095, "learning_rate": 8.17057037191942e-06, "loss": 0.9691, "step": 2789 }, { "epoch": 0.5720729956940742, "grad_norm": 1.5198582410812378, "learning_rate": 8.164041021890647e-06, "loss": 0.9248, "step": 2790 }, { "epoch": 0.5722780397785524, "grad_norm": 1.2391916513442993, "learning_rate": 8.157512481778697e-06, "loss": 0.8548, "step": 2791 }, { "epoch": 0.5724830838630306, "grad_norm": 1.3390958309173584, "learning_rate": 8.150984754463578e-06, "loss": 0.93, "step": 2792 }, { "epoch": 0.5726881279475087, "grad_norm": 1.2489216327667236, "learning_rate": 8.144457842824942e-06, "loss": 0.8758, "step": 2793 }, { "epoch": 0.5728931720319869, "grad_norm": 1.2054446935653687, "learning_rate": 8.137931749742072e-06, "loss": 0.9396, "step": 2794 }, { "epoch": 0.573098216116465, "grad_norm": 1.2585221529006958, "learning_rate": 8.131406478093903e-06, "loss": 0.8458, "step": 2795 }, { "epoch": 0.5733032602009432, "grad_norm": 1.1401171684265137, "learning_rate": 8.124882030758993e-06, "loss": 0.8596, "step": 2796 }, { "epoch": 0.5735083042854213, "grad_norm": 1.3061808347702026, "learning_rate": 8.118358410615545e-06, "loss": 0.9779, "step": 2797 }, { "epoch": 0.5737133483698995, "grad_norm": 1.3588240146636963, "learning_rate": 8.111835620541397e-06, "loss": 0.9333, "step": 2798 }, { "epoch": 0.5739183924543777, "grad_norm": 1.3374959230422974, "learning_rate": 8.105313663414023e-06, "loss": 0.9232, "step": 2799 }, { "epoch": 0.5741234365388559, "grad_norm": 1.2078616619110107, "learning_rate": 8.098792542110518e-06, "loss": 0.8479, "step": 2800 }, { "epoch": 0.574328480623334, "grad_norm": 1.2860734462738037, "learning_rate": 8.092272259507617e-06, "loss": 0.9584, "step": 2801 }, { "epoch": 0.5745335247078122, "grad_norm": 1.3286445140838623, "learning_rate": 8.08575281848169e-06, "loss": 0.8854, "step": 2802 }, { "epoch": 0.5747385687922903, "grad_norm": 1.1855944395065308, "learning_rate": 8.079234221908724e-06, "loss": 0.8319, "step": 2803 }, { "epoch": 0.5749436128767685, "grad_norm": 1.27492356300354, "learning_rate": 8.072716472664348e-06, "loss": 0.9378, "step": 2804 }, { "epoch": 0.5751486569612466, "grad_norm": 1.2940993309020996, "learning_rate": 8.066199573623798e-06, "loss": 0.9281, "step": 2805 }, { "epoch": 0.5753537010457248, "grad_norm": 1.2711708545684814, "learning_rate": 8.05968352766195e-06, "loss": 0.8914, "step": 2806 }, { "epoch": 0.575558745130203, "grad_norm": 1.1292085647583008, "learning_rate": 8.0531683376533e-06, "loss": 0.9194, "step": 2807 }, { "epoch": 0.5757637892146812, "grad_norm": 1.1790131330490112, "learning_rate": 8.046654006471968e-06, "loss": 0.9443, "step": 2808 }, { "epoch": 0.5759688332991593, "grad_norm": 1.2197741270065308, "learning_rate": 8.040140536991688e-06, "loss": 0.862, "step": 2809 }, { "epoch": 0.5761738773836375, "grad_norm": 1.3513505458831787, "learning_rate": 8.03362793208582e-06, "loss": 0.9708, "step": 2810 }, { "epoch": 0.5763789214681156, "grad_norm": 1.454791784286499, "learning_rate": 8.02711619462734e-06, "loss": 0.9678, "step": 2811 }, { "epoch": 0.5765839655525938, "grad_norm": 1.418810248374939, "learning_rate": 8.020605327488846e-06, "loss": 0.9466, "step": 2812 }, { "epoch": 0.576789009637072, "grad_norm": 1.2844955921173096, "learning_rate": 8.014095333542548e-06, "loss": 0.8555, "step": 2813 }, { "epoch": 0.5769940537215501, "grad_norm": 1.1076929569244385, "learning_rate": 8.007586215660268e-06, "loss": 0.871, "step": 2814 }, { "epoch": 0.5771990978060283, "grad_norm": 1.1912202835083008, "learning_rate": 8.001077976713447e-06, "loss": 0.8091, "step": 2815 }, { "epoch": 0.5774041418905065, "grad_norm": 1.2735127210617065, "learning_rate": 7.994570619573135e-06, "loss": 0.9145, "step": 2816 }, { "epoch": 0.5776091859749847, "grad_norm": 1.2430734634399414, "learning_rate": 7.988064147110001e-06, "loss": 0.9368, "step": 2817 }, { "epoch": 0.5778142300594628, "grad_norm": 1.2418709993362427, "learning_rate": 7.981558562194305e-06, "loss": 0.9599, "step": 2818 }, { "epoch": 0.5780192741439409, "grad_norm": 1.2059587240219116, "learning_rate": 7.975053867695937e-06, "loss": 0.8076, "step": 2819 }, { "epoch": 0.5782243182284191, "grad_norm": 1.2181137800216675, "learning_rate": 7.96855006648438e-06, "loss": 0.769, "step": 2820 }, { "epoch": 0.5784293623128973, "grad_norm": 1.1964672803878784, "learning_rate": 7.962047161428728e-06, "loss": 1.0013, "step": 2821 }, { "epoch": 0.5786344063973754, "grad_norm": 1.1390727758407593, "learning_rate": 7.955545155397684e-06, "loss": 0.8651, "step": 2822 }, { "epoch": 0.5788394504818536, "grad_norm": 1.271201252937317, "learning_rate": 7.949044051259542e-06, "loss": 0.9356, "step": 2823 }, { "epoch": 0.5790444945663318, "grad_norm": 1.295044183731079, "learning_rate": 7.94254385188221e-06, "loss": 1.0109, "step": 2824 }, { "epoch": 0.57924953865081, "grad_norm": 1.1898412704467773, "learning_rate": 7.936044560133195e-06, "loss": 0.9322, "step": 2825 }, { "epoch": 0.579454582735288, "grad_norm": 1.30833101272583, "learning_rate": 7.929546178879598e-06, "loss": 0.7753, "step": 2826 }, { "epoch": 0.5796596268197662, "grad_norm": 1.3851484060287476, "learning_rate": 7.923048710988119e-06, "loss": 0.9215, "step": 2827 }, { "epoch": 0.5798646709042444, "grad_norm": 1.1918134689331055, "learning_rate": 7.916552159325063e-06, "loss": 0.9438, "step": 2828 }, { "epoch": 0.5800697149887226, "grad_norm": 1.2791675329208374, "learning_rate": 7.91005652675632e-06, "loss": 0.8993, "step": 2829 }, { "epoch": 0.5802747590732007, "grad_norm": 1.255278468132019, "learning_rate": 7.903561816147383e-06, "loss": 0.9235, "step": 2830 }, { "epoch": 0.5804798031576789, "grad_norm": 1.254215955734253, "learning_rate": 7.897068030363341e-06, "loss": 0.9808, "step": 2831 }, { "epoch": 0.5806848472421571, "grad_norm": 1.2758069038391113, "learning_rate": 7.890575172268858e-06, "loss": 0.9289, "step": 2832 }, { "epoch": 0.5808898913266353, "grad_norm": 1.3041503429412842, "learning_rate": 7.884083244728206e-06, "loss": 0.9562, "step": 2833 }, { "epoch": 0.5810949354111133, "grad_norm": 1.280990481376648, "learning_rate": 7.87759225060524e-06, "loss": 0.9233, "step": 2834 }, { "epoch": 0.5812999794955915, "grad_norm": 1.205830693244934, "learning_rate": 7.87110219276341e-06, "loss": 0.851, "step": 2835 }, { "epoch": 0.5815050235800697, "grad_norm": 1.4128912687301636, "learning_rate": 7.864613074065735e-06, "loss": 0.9106, "step": 2836 }, { "epoch": 0.5817100676645479, "grad_norm": 1.315209150314331, "learning_rate": 7.858124897374837e-06, "loss": 0.9349, "step": 2837 }, { "epoch": 0.581915111749026, "grad_norm": 1.184220552444458, "learning_rate": 7.851637665552919e-06, "loss": 0.8981, "step": 2838 }, { "epoch": 0.5821201558335042, "grad_norm": 1.0988188982009888, "learning_rate": 7.845151381461764e-06, "loss": 0.8585, "step": 2839 }, { "epoch": 0.5823251999179824, "grad_norm": 1.1305588483810425, "learning_rate": 7.838666047962736e-06, "loss": 0.8951, "step": 2840 }, { "epoch": 0.5825302440024606, "grad_norm": 1.2133797407150269, "learning_rate": 7.832181667916783e-06, "loss": 0.9592, "step": 2841 }, { "epoch": 0.5827352880869386, "grad_norm": 1.1872756481170654, "learning_rate": 7.825698244184432e-06, "loss": 0.8711, "step": 2842 }, { "epoch": 0.5829403321714168, "grad_norm": 1.223982810974121, "learning_rate": 7.819215779625785e-06, "loss": 0.9514, "step": 2843 }, { "epoch": 0.583145376255895, "grad_norm": 1.2616641521453857, "learning_rate": 7.812734277100525e-06, "loss": 0.9025, "step": 2844 }, { "epoch": 0.5833504203403732, "grad_norm": 1.1995457410812378, "learning_rate": 7.80625373946791e-06, "loss": 0.8818, "step": 2845 }, { "epoch": 0.5835554644248513, "grad_norm": 1.1662341356277466, "learning_rate": 7.799774169586765e-06, "loss": 0.931, "step": 2846 }, { "epoch": 0.5837605085093295, "grad_norm": 1.172706961631775, "learning_rate": 7.7932955703155e-06, "loss": 0.9138, "step": 2847 }, { "epoch": 0.5839655525938077, "grad_norm": 1.1805572509765625, "learning_rate": 7.786817944512087e-06, "loss": 0.9042, "step": 2848 }, { "epoch": 0.5841705966782859, "grad_norm": 1.2692111730575562, "learning_rate": 7.780341295034079e-06, "loss": 0.923, "step": 2849 }, { "epoch": 0.584375640762764, "grad_norm": 1.279252529144287, "learning_rate": 7.773865624738584e-06, "loss": 0.9919, "step": 2850 }, { "epoch": 0.5845806848472421, "grad_norm": 1.3061044216156006, "learning_rate": 7.767390936482288e-06, "loss": 0.883, "step": 2851 }, { "epoch": 0.5847857289317203, "grad_norm": 1.4332484006881714, "learning_rate": 7.760917233121443e-06, "loss": 0.9028, "step": 2852 }, { "epoch": 0.5849907730161985, "grad_norm": 1.3428163528442383, "learning_rate": 7.754444517511869e-06, "loss": 0.9021, "step": 2853 }, { "epoch": 0.5851958171006767, "grad_norm": 1.2409058809280396, "learning_rate": 7.74797279250894e-06, "loss": 0.9363, "step": 2854 }, { "epoch": 0.5854008611851548, "grad_norm": 1.2458034753799438, "learning_rate": 7.7415020609676e-06, "loss": 0.8585, "step": 2855 }, { "epoch": 0.585605905269633, "grad_norm": 1.2739579677581787, "learning_rate": 7.735032325742355e-06, "loss": 0.8992, "step": 2856 }, { "epoch": 0.5858109493541112, "grad_norm": 1.2239253520965576, "learning_rate": 7.728563589687275e-06, "loss": 0.9553, "step": 2857 }, { "epoch": 0.5860159934385893, "grad_norm": 1.2499977350234985, "learning_rate": 7.722095855655981e-06, "loss": 0.9595, "step": 2858 }, { "epoch": 0.5862210375230674, "grad_norm": 1.3163471221923828, "learning_rate": 7.715629126501656e-06, "loss": 0.9147, "step": 2859 }, { "epoch": 0.5864260816075456, "grad_norm": 1.3344520330429077, "learning_rate": 7.709163405077037e-06, "loss": 0.9715, "step": 2860 }, { "epoch": 0.5866311256920238, "grad_norm": 1.1823662519454956, "learning_rate": 7.702698694234422e-06, "loss": 0.8501, "step": 2861 }, { "epoch": 0.586836169776502, "grad_norm": 1.185598611831665, "learning_rate": 7.696234996825663e-06, "loss": 0.9012, "step": 2862 }, { "epoch": 0.5870412138609801, "grad_norm": 1.2510906457901, "learning_rate": 7.689772315702157e-06, "loss": 0.9258, "step": 2863 }, { "epoch": 0.5872462579454583, "grad_norm": 1.3302456140518188, "learning_rate": 7.683310653714857e-06, "loss": 1.023, "step": 2864 }, { "epoch": 0.5874513020299365, "grad_norm": 1.2869246006011963, "learning_rate": 7.67685001371427e-06, "loss": 0.9115, "step": 2865 }, { "epoch": 0.5876563461144146, "grad_norm": 1.2663384675979614, "learning_rate": 7.670390398550452e-06, "loss": 0.887, "step": 2866 }, { "epoch": 0.5878613901988927, "grad_norm": 1.3112070560455322, "learning_rate": 7.663931811073003e-06, "loss": 0.8879, "step": 2867 }, { "epoch": 0.5880664342833709, "grad_norm": 1.2018793821334839, "learning_rate": 7.657474254131066e-06, "loss": 0.8682, "step": 2868 }, { "epoch": 0.5882714783678491, "grad_norm": 1.2715740203857422, "learning_rate": 7.651017730573339e-06, "loss": 0.8858, "step": 2869 }, { "epoch": 0.5884765224523273, "grad_norm": 1.29631507396698, "learning_rate": 7.644562243248054e-06, "loss": 0.9332, "step": 2870 }, { "epoch": 0.5886815665368054, "grad_norm": 1.3094758987426758, "learning_rate": 7.638107795003003e-06, "loss": 0.9857, "step": 2871 }, { "epoch": 0.5888866106212836, "grad_norm": 1.2735284566879272, "learning_rate": 7.631654388685496e-06, "loss": 0.9751, "step": 2872 }, { "epoch": 0.5890916547057617, "grad_norm": 1.3015142679214478, "learning_rate": 7.625202027142397e-06, "loss": 0.8864, "step": 2873 }, { "epoch": 0.5892966987902399, "grad_norm": 1.1421606540679932, "learning_rate": 7.618750713220115e-06, "loss": 0.8299, "step": 2874 }, { "epoch": 0.589501742874718, "grad_norm": 1.1989432573318481, "learning_rate": 7.61230044976458e-06, "loss": 0.9357, "step": 2875 }, { "epoch": 0.5897067869591962, "grad_norm": 1.3238164186477661, "learning_rate": 7.605851239621276e-06, "loss": 0.9905, "step": 2876 }, { "epoch": 0.5899118310436744, "grad_norm": 1.2691165208816528, "learning_rate": 7.599403085635208e-06, "loss": 0.9228, "step": 2877 }, { "epoch": 0.5901168751281526, "grad_norm": 1.3063615560531616, "learning_rate": 7.5929559906509234e-06, "loss": 0.9404, "step": 2878 }, { "epoch": 0.5903219192126307, "grad_norm": 1.207707405090332, "learning_rate": 7.586509957512499e-06, "loss": 0.8959, "step": 2879 }, { "epoch": 0.5905269632971089, "grad_norm": 1.2895835638046265, "learning_rate": 7.58006498906355e-06, "loss": 0.8743, "step": 2880 }, { "epoch": 0.590732007381587, "grad_norm": 1.1681092977523804, "learning_rate": 7.5736210881472085e-06, "loss": 0.9302, "step": 2881 }, { "epoch": 0.5909370514660652, "grad_norm": 1.2286946773529053, "learning_rate": 7.567178257606147e-06, "loss": 0.8654, "step": 2882 }, { "epoch": 0.5911420955505433, "grad_norm": 1.3843135833740234, "learning_rate": 7.560736500282563e-06, "loss": 0.9509, "step": 2883 }, { "epoch": 0.5913471396350215, "grad_norm": 1.2050753831863403, "learning_rate": 7.554295819018178e-06, "loss": 0.902, "step": 2884 }, { "epoch": 0.5915521837194997, "grad_norm": 1.221651554107666, "learning_rate": 7.547856216654248e-06, "loss": 0.969, "step": 2885 }, { "epoch": 0.5917572278039779, "grad_norm": 1.2512894868850708, "learning_rate": 7.541417696031535e-06, "loss": 0.8848, "step": 2886 }, { "epoch": 0.5919622718884561, "grad_norm": 1.1722991466522217, "learning_rate": 7.534980259990341e-06, "loss": 0.9005, "step": 2887 }, { "epoch": 0.5921673159729341, "grad_norm": 1.2187501192092896, "learning_rate": 7.528543911370483e-06, "loss": 0.9286, "step": 2888 }, { "epoch": 0.5923723600574123, "grad_norm": 1.2457083463668823, "learning_rate": 7.522108653011296e-06, "loss": 0.9237, "step": 2889 }, { "epoch": 0.5925774041418905, "grad_norm": 1.2275089025497437, "learning_rate": 7.515674487751636e-06, "loss": 0.9853, "step": 2890 }, { "epoch": 0.5927824482263687, "grad_norm": 1.3708826303482056, "learning_rate": 7.509241418429882e-06, "loss": 0.9506, "step": 2891 }, { "epoch": 0.5929874923108468, "grad_norm": 1.2096617221832275, "learning_rate": 7.50280944788392e-06, "loss": 0.8471, "step": 2892 }, { "epoch": 0.593192536395325, "grad_norm": 1.226130485534668, "learning_rate": 7.496378578951155e-06, "loss": 0.9059, "step": 2893 }, { "epoch": 0.5933975804798032, "grad_norm": 1.2769927978515625, "learning_rate": 7.489948814468517e-06, "loss": 0.8924, "step": 2894 }, { "epoch": 0.5936026245642814, "grad_norm": 1.2709596157073975, "learning_rate": 7.483520157272427e-06, "loss": 0.9633, "step": 2895 }, { "epoch": 0.5938076686487594, "grad_norm": 1.335538387298584, "learning_rate": 7.477092610198834e-06, "loss": 0.954, "step": 2896 }, { "epoch": 0.5940127127332376, "grad_norm": 1.276915431022644, "learning_rate": 7.470666176083193e-06, "loss": 0.9026, "step": 2897 }, { "epoch": 0.5942177568177158, "grad_norm": 1.3103947639465332, "learning_rate": 7.464240857760472e-06, "loss": 0.8281, "step": 2898 }, { "epoch": 0.594422800902194, "grad_norm": 1.2462360858917236, "learning_rate": 7.4578166580651335e-06, "loss": 0.9406, "step": 2899 }, { "epoch": 0.5946278449866721, "grad_norm": 1.2210688591003418, "learning_rate": 7.45139357983116e-06, "loss": 0.871, "step": 2900 }, { "epoch": 0.5948328890711503, "grad_norm": 1.2345253229141235, "learning_rate": 7.444971625892035e-06, "loss": 0.8588, "step": 2901 }, { "epoch": 0.5950379331556285, "grad_norm": 1.2955323457717896, "learning_rate": 7.438550799080746e-06, "loss": 0.9142, "step": 2902 }, { "epoch": 0.5952429772401067, "grad_norm": 1.136735439300537, "learning_rate": 7.432131102229787e-06, "loss": 0.8791, "step": 2903 }, { "epoch": 0.5954480213245847, "grad_norm": 1.216698408126831, "learning_rate": 7.4257125381711424e-06, "loss": 0.9129, "step": 2904 }, { "epoch": 0.5956530654090629, "grad_norm": 1.3707414865493774, "learning_rate": 7.4192951097363065e-06, "loss": 0.8058, "step": 2905 }, { "epoch": 0.5958581094935411, "grad_norm": 1.309093952178955, "learning_rate": 7.412878819756274e-06, "loss": 0.9279, "step": 2906 }, { "epoch": 0.5960631535780193, "grad_norm": 1.2482690811157227, "learning_rate": 7.40646367106153e-06, "loss": 0.916, "step": 2907 }, { "epoch": 0.5962681976624974, "grad_norm": 1.2702230215072632, "learning_rate": 7.400049666482061e-06, "loss": 0.8979, "step": 2908 }, { "epoch": 0.5964732417469756, "grad_norm": 1.3919755220413208, "learning_rate": 7.393636808847344e-06, "loss": 0.9016, "step": 2909 }, { "epoch": 0.5966782858314538, "grad_norm": 1.2963910102844238, "learning_rate": 7.387225100986358e-06, "loss": 0.9058, "step": 2910 }, { "epoch": 0.596883329915932, "grad_norm": 1.3191051483154297, "learning_rate": 7.3808145457275685e-06, "loss": 0.8952, "step": 2911 }, { "epoch": 0.59708837400041, "grad_norm": 1.2251572608947754, "learning_rate": 7.3744051458989395e-06, "loss": 0.8556, "step": 2912 }, { "epoch": 0.5972934180848882, "grad_norm": 1.2539938688278198, "learning_rate": 7.367996904327911e-06, "loss": 0.9443, "step": 2913 }, { "epoch": 0.5974984621693664, "grad_norm": 1.2770098447799683, "learning_rate": 7.3615898238414264e-06, "loss": 0.9758, "step": 2914 }, { "epoch": 0.5977035062538446, "grad_norm": 1.2646335363388062, "learning_rate": 7.35518390726591e-06, "loss": 0.896, "step": 2915 }, { "epoch": 0.5979085503383227, "grad_norm": 1.3997342586517334, "learning_rate": 7.348779157427282e-06, "loss": 0.8847, "step": 2916 }, { "epoch": 0.5981135944228009, "grad_norm": 1.4226685762405396, "learning_rate": 7.342375577150928e-06, "loss": 0.8839, "step": 2917 }, { "epoch": 0.5983186385072791, "grad_norm": 1.2099021673202515, "learning_rate": 7.335973169261737e-06, "loss": 0.8687, "step": 2918 }, { "epoch": 0.5985236825917573, "grad_norm": 1.224168300628662, "learning_rate": 7.329571936584072e-06, "loss": 0.9459, "step": 2919 }, { "epoch": 0.5987287266762354, "grad_norm": 1.1631883382797241, "learning_rate": 7.323171881941782e-06, "loss": 0.8455, "step": 2920 }, { "epoch": 0.5989337707607135, "grad_norm": 1.3675999641418457, "learning_rate": 7.316773008158193e-06, "loss": 0.9693, "step": 2921 }, { "epoch": 0.5991388148451917, "grad_norm": 1.2918996810913086, "learning_rate": 7.310375318056107e-06, "loss": 0.9586, "step": 2922 }, { "epoch": 0.5993438589296699, "grad_norm": 1.1696044206619263, "learning_rate": 7.303978814457811e-06, "loss": 0.9091, "step": 2923 }, { "epoch": 0.5995489030141481, "grad_norm": 1.2197659015655518, "learning_rate": 7.297583500185064e-06, "loss": 0.8661, "step": 2924 }, { "epoch": 0.5997539470986262, "grad_norm": 1.2171576023101807, "learning_rate": 7.2911893780591025e-06, "loss": 0.9472, "step": 2925 }, { "epoch": 0.5999589911831044, "grad_norm": 1.2004361152648926, "learning_rate": 7.2847964509006355e-06, "loss": 0.9209, "step": 2926 }, { "epoch": 0.6001640352675826, "grad_norm": 1.3160731792449951, "learning_rate": 7.278404721529843e-06, "loss": 0.9209, "step": 2927 }, { "epoch": 0.6003690793520607, "grad_norm": 1.2440998554229736, "learning_rate": 7.272014192766381e-06, "loss": 0.9184, "step": 2928 }, { "epoch": 0.6005741234365388, "grad_norm": 1.3805409669876099, "learning_rate": 7.265624867429375e-06, "loss": 0.9462, "step": 2929 }, { "epoch": 0.600779167521017, "grad_norm": 1.2084861993789673, "learning_rate": 7.259236748337421e-06, "loss": 0.9007, "step": 2930 }, { "epoch": 0.6009842116054952, "grad_norm": 1.1980339288711548, "learning_rate": 7.252849838308573e-06, "loss": 0.8379, "step": 2931 }, { "epoch": 0.6011892556899734, "grad_norm": 1.2484228610992432, "learning_rate": 7.246464140160365e-06, "loss": 0.9111, "step": 2932 }, { "epoch": 0.6013942997744515, "grad_norm": 1.2348201274871826, "learning_rate": 7.240079656709787e-06, "loss": 0.9283, "step": 2933 }, { "epoch": 0.6015993438589297, "grad_norm": 1.2990715503692627, "learning_rate": 7.233696390773303e-06, "loss": 0.9054, "step": 2934 }, { "epoch": 0.6018043879434078, "grad_norm": 1.137951135635376, "learning_rate": 7.227314345166824e-06, "loss": 0.7689, "step": 2935 }, { "epoch": 0.602009432027886, "grad_norm": 1.2146817445755005, "learning_rate": 7.220933522705737e-06, "loss": 0.8607, "step": 2936 }, { "epoch": 0.6022144761123641, "grad_norm": 1.2668322324752808, "learning_rate": 7.214553926204884e-06, "loss": 0.8892, "step": 2937 }, { "epoch": 0.6024195201968423, "grad_norm": 1.2607197761535645, "learning_rate": 7.20817555847857e-06, "loss": 0.9337, "step": 2938 }, { "epoch": 0.6026245642813205, "grad_norm": 1.3455644845962524, "learning_rate": 7.20179842234055e-06, "loss": 0.9558, "step": 2939 }, { "epoch": 0.6028296083657987, "grad_norm": 1.4045742750167847, "learning_rate": 7.195422520604043e-06, "loss": 0.9372, "step": 2940 }, { "epoch": 0.6030346524502768, "grad_norm": 1.1955000162124634, "learning_rate": 7.189047856081719e-06, "loss": 0.9399, "step": 2941 }, { "epoch": 0.603239696534755, "grad_norm": 1.2945246696472168, "learning_rate": 7.182674431585703e-06, "loss": 0.8636, "step": 2942 }, { "epoch": 0.6034447406192331, "grad_norm": 1.3259333372116089, "learning_rate": 7.176302249927578e-06, "loss": 0.9678, "step": 2943 }, { "epoch": 0.6036497847037113, "grad_norm": 1.3126248121261597, "learning_rate": 7.169931313918377e-06, "loss": 0.8942, "step": 2944 }, { "epoch": 0.6038548287881894, "grad_norm": 1.2827205657958984, "learning_rate": 7.163561626368572e-06, "loss": 0.9861, "step": 2945 }, { "epoch": 0.6040598728726676, "grad_norm": 1.2621757984161377, "learning_rate": 7.157193190088097e-06, "loss": 0.8826, "step": 2946 }, { "epoch": 0.6042649169571458, "grad_norm": 1.2373433113098145, "learning_rate": 7.150826007886334e-06, "loss": 0.9493, "step": 2947 }, { "epoch": 0.604469961041624, "grad_norm": 1.1693648099899292, "learning_rate": 7.144460082572109e-06, "loss": 0.8967, "step": 2948 }, { "epoch": 0.6046750051261021, "grad_norm": 1.181069254875183, "learning_rate": 7.138095416953688e-06, "loss": 0.8827, "step": 2949 }, { "epoch": 0.6048800492105803, "grad_norm": 1.1447780132293701, "learning_rate": 7.131732013838786e-06, "loss": 0.9007, "step": 2950 }, { "epoch": 0.6050850932950584, "grad_norm": 1.3018629550933838, "learning_rate": 7.125369876034565e-06, "loss": 0.8561, "step": 2951 }, { "epoch": 0.6052901373795366, "grad_norm": 1.3609236478805542, "learning_rate": 7.119009006347625e-06, "loss": 0.9401, "step": 2952 }, { "epoch": 0.6054951814640147, "grad_norm": 1.360093355178833, "learning_rate": 7.112649407584004e-06, "loss": 0.9363, "step": 2953 }, { "epoch": 0.6057002255484929, "grad_norm": 1.277512550354004, "learning_rate": 7.106291082549183e-06, "loss": 0.924, "step": 2954 }, { "epoch": 0.6059052696329711, "grad_norm": 1.2748281955718994, "learning_rate": 7.099934034048079e-06, "loss": 0.9278, "step": 2955 }, { "epoch": 0.6061103137174493, "grad_norm": 1.2237952947616577, "learning_rate": 7.093578264885048e-06, "loss": 0.8981, "step": 2956 }, { "epoch": 0.6063153578019275, "grad_norm": 1.3820174932479858, "learning_rate": 7.087223777863883e-06, "loss": 0.9103, "step": 2957 }, { "epoch": 0.6065204018864055, "grad_norm": 1.3348098993301392, "learning_rate": 7.080870575787804e-06, "loss": 0.9033, "step": 2958 }, { "epoch": 0.6067254459708837, "grad_norm": 1.2479504346847534, "learning_rate": 7.074518661459471e-06, "loss": 0.9423, "step": 2959 }, { "epoch": 0.6069304900553619, "grad_norm": 1.4165959358215332, "learning_rate": 7.068168037680973e-06, "loss": 0.9323, "step": 2960 }, { "epoch": 0.6071355341398401, "grad_norm": 1.2420133352279663, "learning_rate": 7.061818707253832e-06, "loss": 0.8599, "step": 2961 }, { "epoch": 0.6073405782243182, "grad_norm": 1.2447832822799683, "learning_rate": 7.055470672979003e-06, "loss": 0.9038, "step": 2962 }, { "epoch": 0.6075456223087964, "grad_norm": 1.2668691873550415, "learning_rate": 7.049123937656855e-06, "loss": 0.9507, "step": 2963 }, { "epoch": 0.6077506663932746, "grad_norm": 1.3228052854537964, "learning_rate": 7.0427785040871975e-06, "loss": 0.9925, "step": 2964 }, { "epoch": 0.6079557104777528, "grad_norm": 1.2433578968048096, "learning_rate": 7.0364343750692635e-06, "loss": 0.9241, "step": 2965 }, { "epoch": 0.6081607545622308, "grad_norm": 1.1536989212036133, "learning_rate": 7.03009155340171e-06, "loss": 0.8451, "step": 2966 }, { "epoch": 0.608365798646709, "grad_norm": 1.379577398300171, "learning_rate": 7.023750041882609e-06, "loss": 0.9098, "step": 2967 }, { "epoch": 0.6085708427311872, "grad_norm": 1.354771614074707, "learning_rate": 7.017409843309464e-06, "loss": 0.9126, "step": 2968 }, { "epoch": 0.6087758868156654, "grad_norm": 1.270761489868164, "learning_rate": 7.011070960479201e-06, "loss": 0.8989, "step": 2969 }, { "epoch": 0.6089809309001435, "grad_norm": 1.3324824571609497, "learning_rate": 7.0047333961881544e-06, "loss": 0.9249, "step": 2970 }, { "epoch": 0.6091859749846217, "grad_norm": 1.3544102907180786, "learning_rate": 6.998397153232091e-06, "loss": 0.9432, "step": 2971 }, { "epoch": 0.6093910190690999, "grad_norm": 1.2335268259048462, "learning_rate": 6.992062234406185e-06, "loss": 0.9227, "step": 2972 }, { "epoch": 0.6095960631535781, "grad_norm": 1.2717607021331787, "learning_rate": 6.985728642505025e-06, "loss": 0.9437, "step": 2973 }, { "epoch": 0.6098011072380561, "grad_norm": 1.2763210535049438, "learning_rate": 6.979396380322621e-06, "loss": 0.8449, "step": 2974 }, { "epoch": 0.6100061513225343, "grad_norm": 1.2797784805297852, "learning_rate": 6.973065450652398e-06, "loss": 0.8898, "step": 2975 }, { "epoch": 0.6102111954070125, "grad_norm": 1.1802293062210083, "learning_rate": 6.966735856287181e-06, "loss": 0.8514, "step": 2976 }, { "epoch": 0.6104162394914907, "grad_norm": 1.2459684610366821, "learning_rate": 6.960407600019217e-06, "loss": 0.9643, "step": 2977 }, { "epoch": 0.6106212835759688, "grad_norm": 1.2262698411941528, "learning_rate": 6.954080684640161e-06, "loss": 0.8557, "step": 2978 }, { "epoch": 0.610826327660447, "grad_norm": 1.2751373052597046, "learning_rate": 6.9477551129410735e-06, "loss": 0.8815, "step": 2979 }, { "epoch": 0.6110313717449252, "grad_norm": 1.288993239402771, "learning_rate": 6.9414308877124285e-06, "loss": 0.9038, "step": 2980 }, { "epoch": 0.6112364158294034, "grad_norm": 1.293328046798706, "learning_rate": 6.935108011744092e-06, "loss": 0.8681, "step": 2981 }, { "epoch": 0.6114414599138814, "grad_norm": 1.2034695148468018, "learning_rate": 6.9287864878253475e-06, "loss": 0.9516, "step": 2982 }, { "epoch": 0.6116465039983596, "grad_norm": 1.2314229011535645, "learning_rate": 6.92246631874488e-06, "loss": 0.9043, "step": 2983 }, { "epoch": 0.6118515480828378, "grad_norm": 1.2902494668960571, "learning_rate": 6.9161475072907816e-06, "loss": 0.9538, "step": 2984 }, { "epoch": 0.612056592167316, "grad_norm": 1.3528590202331543, "learning_rate": 6.909830056250527e-06, "loss": 0.9053, "step": 2985 }, { "epoch": 0.6122616362517941, "grad_norm": 1.1438459157943726, "learning_rate": 6.903513968411008e-06, "loss": 0.8781, "step": 2986 }, { "epoch": 0.6124666803362723, "grad_norm": 1.3393993377685547, "learning_rate": 6.897199246558515e-06, "loss": 0.8657, "step": 2987 }, { "epoch": 0.6126717244207505, "grad_norm": 1.3833197355270386, "learning_rate": 6.8908858934787235e-06, "loss": 0.8651, "step": 2988 }, { "epoch": 0.6128767685052287, "grad_norm": 1.3288979530334473, "learning_rate": 6.88457391195672e-06, "loss": 0.969, "step": 2989 }, { "epoch": 0.6130818125897067, "grad_norm": 1.3055708408355713, "learning_rate": 6.878263304776971e-06, "loss": 0.9875, "step": 2990 }, { "epoch": 0.6132868566741849, "grad_norm": 1.1995313167572021, "learning_rate": 6.871954074723346e-06, "loss": 0.8763, "step": 2991 }, { "epoch": 0.6134919007586631, "grad_norm": 1.4050877094268799, "learning_rate": 6.865646224579108e-06, "loss": 0.9268, "step": 2992 }, { "epoch": 0.6136969448431413, "grad_norm": 1.3724660873413086, "learning_rate": 6.859339757126911e-06, "loss": 0.9508, "step": 2993 }, { "epoch": 0.6139019889276195, "grad_norm": 1.2842676639556885, "learning_rate": 6.853034675148789e-06, "loss": 0.966, "step": 2994 }, { "epoch": 0.6141070330120976, "grad_norm": 1.184065580368042, "learning_rate": 6.846730981426175e-06, "loss": 0.8368, "step": 2995 }, { "epoch": 0.6143120770965758, "grad_norm": 1.3213964700698853, "learning_rate": 6.840428678739887e-06, "loss": 0.9808, "step": 2996 }, { "epoch": 0.614517121181054, "grad_norm": 1.3412363529205322, "learning_rate": 6.834127769870134e-06, "loss": 0.9406, "step": 2997 }, { "epoch": 0.6147221652655321, "grad_norm": 1.380632996559143, "learning_rate": 6.827828257596503e-06, "loss": 0.9324, "step": 2998 }, { "epoch": 0.6149272093500102, "grad_norm": 1.3463730812072754, "learning_rate": 6.821530144697961e-06, "loss": 0.9097, "step": 2999 }, { "epoch": 0.6151322534344884, "grad_norm": 1.236157774925232, "learning_rate": 6.815233433952871e-06, "loss": 0.8498, "step": 3000 }, { "epoch": 0.6153372975189666, "grad_norm": 1.166683316230774, "learning_rate": 6.808938128138969e-06, "loss": 0.9294, "step": 3001 }, { "epoch": 0.6155423416034448, "grad_norm": 1.4356027841567993, "learning_rate": 6.802644230033373e-06, "loss": 0.8751, "step": 3002 }, { "epoch": 0.6157473856879229, "grad_norm": 1.2637369632720947, "learning_rate": 6.796351742412576e-06, "loss": 0.9358, "step": 3003 }, { "epoch": 0.6159524297724011, "grad_norm": 1.3349924087524414, "learning_rate": 6.790060668052457e-06, "loss": 0.9111, "step": 3004 }, { "epoch": 0.6161574738568792, "grad_norm": 1.2453463077545166, "learning_rate": 6.783771009728264e-06, "loss": 0.9397, "step": 3005 }, { "epoch": 0.6163625179413574, "grad_norm": 1.2067664861679077, "learning_rate": 6.777482770214623e-06, "loss": 0.8681, "step": 3006 }, { "epoch": 0.6165675620258355, "grad_norm": 1.230567216873169, "learning_rate": 6.771195952285541e-06, "loss": 0.8548, "step": 3007 }, { "epoch": 0.6167726061103137, "grad_norm": 1.2809439897537231, "learning_rate": 6.7649105587143814e-06, "loss": 0.8595, "step": 3008 }, { "epoch": 0.6169776501947919, "grad_norm": 1.247493028640747, "learning_rate": 6.758626592273894e-06, "loss": 0.9244, "step": 3009 }, { "epoch": 0.6171826942792701, "grad_norm": 1.254299283027649, "learning_rate": 6.752344055736195e-06, "loss": 0.8534, "step": 3010 }, { "epoch": 0.6173877383637482, "grad_norm": 1.2253090143203735, "learning_rate": 6.746062951872771e-06, "loss": 0.8407, "step": 3011 }, { "epoch": 0.6175927824482264, "grad_norm": 1.3160852193832397, "learning_rate": 6.739783283454469e-06, "loss": 0.8512, "step": 3012 }, { "epoch": 0.6177978265327045, "grad_norm": 1.2103091478347778, "learning_rate": 6.73350505325151e-06, "loss": 0.8175, "step": 3013 }, { "epoch": 0.6180028706171827, "grad_norm": 1.226422667503357, "learning_rate": 6.72722826403348e-06, "loss": 0.8976, "step": 3014 }, { "epoch": 0.6182079147016608, "grad_norm": 1.3739830255508423, "learning_rate": 6.7209529185693275e-06, "loss": 0.9835, "step": 3015 }, { "epoch": 0.618412958786139, "grad_norm": 1.340942621231079, "learning_rate": 6.714679019627371e-06, "loss": 0.9051, "step": 3016 }, { "epoch": 0.6186180028706172, "grad_norm": 1.311774730682373, "learning_rate": 6.708406569975274e-06, "loss": 0.8783, "step": 3017 }, { "epoch": 0.6188230469550954, "grad_norm": 1.3434189558029175, "learning_rate": 6.702135572380078e-06, "loss": 0.8291, "step": 3018 }, { "epoch": 0.6190280910395735, "grad_norm": 1.2047265768051147, "learning_rate": 6.695866029608178e-06, "loss": 0.8338, "step": 3019 }, { "epoch": 0.6192331351240516, "grad_norm": 1.2142951488494873, "learning_rate": 6.689597944425324e-06, "loss": 0.8726, "step": 3020 }, { "epoch": 0.6194381792085298, "grad_norm": 1.4043828248977661, "learning_rate": 6.683331319596624e-06, "loss": 0.8322, "step": 3021 }, { "epoch": 0.619643223293008, "grad_norm": 1.1247092485427856, "learning_rate": 6.6770661578865444e-06, "loss": 0.9057, "step": 3022 }, { "epoch": 0.6198482673774861, "grad_norm": 1.294207215309143, "learning_rate": 6.670802462058907e-06, "loss": 0.9122, "step": 3023 }, { "epoch": 0.6200533114619643, "grad_norm": 1.2802577018737793, "learning_rate": 6.664540234876884e-06, "loss": 0.8791, "step": 3024 }, { "epoch": 0.6202583555464425, "grad_norm": 1.2060115337371826, "learning_rate": 6.6582794791030035e-06, "loss": 0.9027, "step": 3025 }, { "epoch": 0.6204633996309207, "grad_norm": 1.160004734992981, "learning_rate": 6.652020197499136e-06, "loss": 0.9245, "step": 3026 }, { "epoch": 0.6206684437153989, "grad_norm": 1.1954691410064697, "learning_rate": 6.645762392826509e-06, "loss": 0.9671, "step": 3027 }, { "epoch": 0.6208734877998769, "grad_norm": 1.2686070203781128, "learning_rate": 6.639506067845698e-06, "loss": 0.945, "step": 3028 }, { "epoch": 0.6210785318843551, "grad_norm": 1.210477352142334, "learning_rate": 6.63325122531663e-06, "loss": 0.8513, "step": 3029 }, { "epoch": 0.6212835759688333, "grad_norm": 1.3425830602645874, "learning_rate": 6.626997867998564e-06, "loss": 0.9386, "step": 3030 }, { "epoch": 0.6214886200533115, "grad_norm": 1.2519723176956177, "learning_rate": 6.620745998650113e-06, "loss": 0.961, "step": 3031 }, { "epoch": 0.6216936641377896, "grad_norm": 1.2636990547180176, "learning_rate": 6.614495620029238e-06, "loss": 0.8718, "step": 3032 }, { "epoch": 0.6218987082222678, "grad_norm": 1.2757803201675415, "learning_rate": 6.608246734893237e-06, "loss": 0.8621, "step": 3033 }, { "epoch": 0.622103752306746, "grad_norm": 1.3320963382720947, "learning_rate": 6.601999345998748e-06, "loss": 0.9062, "step": 3034 }, { "epoch": 0.6223087963912242, "grad_norm": 1.2588492631912231, "learning_rate": 6.5957534561017475e-06, "loss": 0.8973, "step": 3035 }, { "epoch": 0.6225138404757022, "grad_norm": 1.2791340351104736, "learning_rate": 6.58950906795756e-06, "loss": 0.8988, "step": 3036 }, { "epoch": 0.6227188845601804, "grad_norm": 1.3210082054138184, "learning_rate": 6.583266184320836e-06, "loss": 1.026, "step": 3037 }, { "epoch": 0.6229239286446586, "grad_norm": 1.2797207832336426, "learning_rate": 6.5770248079455714e-06, "loss": 0.9159, "step": 3038 }, { "epoch": 0.6231289727291368, "grad_norm": 1.362836241722107, "learning_rate": 6.570784941585091e-06, "loss": 0.8293, "step": 3039 }, { "epoch": 0.6233340168136149, "grad_norm": 1.2322076559066772, "learning_rate": 6.564546587992054e-06, "loss": 0.8069, "step": 3040 }, { "epoch": 0.6235390608980931, "grad_norm": 1.1767476797103882, "learning_rate": 6.558309749918458e-06, "loss": 0.9221, "step": 3041 }, { "epoch": 0.6237441049825713, "grad_norm": 1.1435402631759644, "learning_rate": 6.552074430115624e-06, "loss": 0.8477, "step": 3042 }, { "epoch": 0.6239491490670495, "grad_norm": 1.3021409511566162, "learning_rate": 6.545840631334217e-06, "loss": 0.9708, "step": 3043 }, { "epoch": 0.6241541931515275, "grad_norm": 1.1845654249191284, "learning_rate": 6.53960835632421e-06, "loss": 0.868, "step": 3044 }, { "epoch": 0.6243592372360057, "grad_norm": 1.2467278242111206, "learning_rate": 6.5333776078349184e-06, "loss": 0.9231, "step": 3045 }, { "epoch": 0.6245642813204839, "grad_norm": 1.2215632200241089, "learning_rate": 6.5271483886149835e-06, "loss": 0.895, "step": 3046 }, { "epoch": 0.6247693254049621, "grad_norm": 1.3236417770385742, "learning_rate": 6.520920701412371e-06, "loss": 0.9579, "step": 3047 }, { "epoch": 0.6249743694894402, "grad_norm": 1.3300244808197021, "learning_rate": 6.514694548974363e-06, "loss": 0.9735, "step": 3048 }, { "epoch": 0.6251794135739184, "grad_norm": 1.2992075681686401, "learning_rate": 6.508469934047574e-06, "loss": 0.8776, "step": 3049 }, { "epoch": 0.6253844576583966, "grad_norm": 1.118923306465149, "learning_rate": 6.5022468593779385e-06, "loss": 0.8633, "step": 3050 }, { "epoch": 0.6255895017428748, "grad_norm": 1.2710455656051636, "learning_rate": 6.496025327710707e-06, "loss": 0.8844, "step": 3051 }, { "epoch": 0.6257945458273528, "grad_norm": 1.4145773649215698, "learning_rate": 6.489805341790456e-06, "loss": 0.997, "step": 3052 }, { "epoch": 0.625999589911831, "grad_norm": 1.2537202835083008, "learning_rate": 6.483586904361074e-06, "loss": 0.8172, "step": 3053 }, { "epoch": 0.6262046339963092, "grad_norm": 1.2835391759872437, "learning_rate": 6.47737001816577e-06, "loss": 0.9976, "step": 3054 }, { "epoch": 0.6264096780807874, "grad_norm": 1.2366207838058472, "learning_rate": 6.471154685947063e-06, "loss": 0.8639, "step": 3055 }, { "epoch": 0.6266147221652655, "grad_norm": 1.3896383047103882, "learning_rate": 6.464940910446802e-06, "loss": 0.8691, "step": 3056 }, { "epoch": 0.6268197662497437, "grad_norm": 1.3111494779586792, "learning_rate": 6.458728694406124e-06, "loss": 0.9449, "step": 3057 }, { "epoch": 0.6270248103342219, "grad_norm": 1.2353217601776123, "learning_rate": 6.452518040565503e-06, "loss": 0.9266, "step": 3058 }, { "epoch": 0.6272298544187, "grad_norm": 1.1896562576293945, "learning_rate": 6.446308951664708e-06, "loss": 0.9276, "step": 3059 }, { "epoch": 0.6274348985031781, "grad_norm": 1.2795946598052979, "learning_rate": 6.4401014304428245e-06, "loss": 0.8248, "step": 3060 }, { "epoch": 0.6276399425876563, "grad_norm": 1.2213460206985474, "learning_rate": 6.43389547963825e-06, "loss": 0.881, "step": 3061 }, { "epoch": 0.6278449866721345, "grad_norm": 1.1205031871795654, "learning_rate": 6.427691101988673e-06, "loss": 0.8856, "step": 3062 }, { "epoch": 0.6280500307566127, "grad_norm": 1.3377233743667603, "learning_rate": 6.421488300231107e-06, "loss": 0.9051, "step": 3063 }, { "epoch": 0.6282550748410909, "grad_norm": 1.1478989124298096, "learning_rate": 6.41528707710186e-06, "loss": 0.8869, "step": 3064 }, { "epoch": 0.628460118925569, "grad_norm": 1.358068823814392, "learning_rate": 6.409087435336549e-06, "loss": 0.9116, "step": 3065 }, { "epoch": 0.6286651630100472, "grad_norm": 1.2482022047042847, "learning_rate": 6.402889377670083e-06, "loss": 0.9268, "step": 3066 }, { "epoch": 0.6288702070945253, "grad_norm": 1.2419319152832031, "learning_rate": 6.396692906836686e-06, "loss": 0.8988, "step": 3067 }, { "epoch": 0.6290752511790035, "grad_norm": 1.175855040550232, "learning_rate": 6.390498025569874e-06, "loss": 0.8389, "step": 3068 }, { "epoch": 0.6292802952634816, "grad_norm": 1.3280062675476074, "learning_rate": 6.38430473660246e-06, "loss": 0.883, "step": 3069 }, { "epoch": 0.6294853393479598, "grad_norm": 1.2849873304367065, "learning_rate": 6.378113042666566e-06, "loss": 0.8431, "step": 3070 }, { "epoch": 0.629690383432438, "grad_norm": 1.2310351133346558, "learning_rate": 6.3719229464935915e-06, "loss": 0.8411, "step": 3071 }, { "epoch": 0.6298954275169162, "grad_norm": 1.125778079032898, "learning_rate": 6.3657344508142495e-06, "loss": 0.8894, "step": 3072 }, { "epoch": 0.6301004716013943, "grad_norm": 1.269739031791687, "learning_rate": 6.3595475583585344e-06, "loss": 0.972, "step": 3073 }, { "epoch": 0.6303055156858725, "grad_norm": 1.3103028535842896, "learning_rate": 6.3533622718557464e-06, "loss": 0.891, "step": 3074 }, { "epoch": 0.6305105597703506, "grad_norm": 1.3847503662109375, "learning_rate": 6.34717859403446e-06, "loss": 0.8936, "step": 3075 }, { "epoch": 0.6307156038548288, "grad_norm": 1.2310553789138794, "learning_rate": 6.340996527622552e-06, "loss": 0.92, "step": 3076 }, { "epoch": 0.6309206479393069, "grad_norm": 1.2716422080993652, "learning_rate": 6.334816075347185e-06, "loss": 0.9361, "step": 3077 }, { "epoch": 0.6311256920237851, "grad_norm": 1.2946302890777588, "learning_rate": 6.328637239934814e-06, "loss": 0.93, "step": 3078 }, { "epoch": 0.6313307361082633, "grad_norm": 1.2844568490982056, "learning_rate": 6.322460024111176e-06, "loss": 0.9227, "step": 3079 }, { "epoch": 0.6315357801927415, "grad_norm": 1.358607530593872, "learning_rate": 6.31628443060129e-06, "loss": 0.9598, "step": 3080 }, { "epoch": 0.6317408242772196, "grad_norm": 1.1837445497512817, "learning_rate": 6.310110462129465e-06, "loss": 0.8998, "step": 3081 }, { "epoch": 0.6319458683616977, "grad_norm": 1.2061166763305664, "learning_rate": 6.303938121419295e-06, "loss": 0.8046, "step": 3082 }, { "epoch": 0.6321509124461759, "grad_norm": 1.2062346935272217, "learning_rate": 6.297767411193652e-06, "loss": 0.8869, "step": 3083 }, { "epoch": 0.6323559565306541, "grad_norm": 1.398399829864502, "learning_rate": 6.291598334174685e-06, "loss": 0.8558, "step": 3084 }, { "epoch": 0.6325610006151322, "grad_norm": 1.2170286178588867, "learning_rate": 6.28543089308383e-06, "loss": 0.9127, "step": 3085 }, { "epoch": 0.6327660446996104, "grad_norm": 1.2230093479156494, "learning_rate": 6.2792650906418e-06, "loss": 0.8615, "step": 3086 }, { "epoch": 0.6329710887840886, "grad_norm": 1.2613880634307861, "learning_rate": 6.273100929568579e-06, "loss": 0.8552, "step": 3087 }, { "epoch": 0.6331761328685668, "grad_norm": 1.2909828424453735, "learning_rate": 6.266938412583439e-06, "loss": 0.9097, "step": 3088 }, { "epoch": 0.6333811769530449, "grad_norm": 1.2557225227355957, "learning_rate": 6.26077754240491e-06, "loss": 0.9928, "step": 3089 }, { "epoch": 0.633586221037523, "grad_norm": 1.2856817245483398, "learning_rate": 6.2546183217508075e-06, "loss": 0.9478, "step": 3090 }, { "epoch": 0.6337912651220012, "grad_norm": 1.2285281419754028, "learning_rate": 6.248460753338219e-06, "loss": 0.8802, "step": 3091 }, { "epoch": 0.6339963092064794, "grad_norm": 1.2831037044525146, "learning_rate": 6.242304839883502e-06, "loss": 0.9735, "step": 3092 }, { "epoch": 0.6342013532909575, "grad_norm": 1.3164231777191162, "learning_rate": 6.236150584102276e-06, "loss": 0.9457, "step": 3093 }, { "epoch": 0.6344063973754357, "grad_norm": 1.2096885442733765, "learning_rate": 6.22999798870944e-06, "loss": 0.8555, "step": 3094 }, { "epoch": 0.6346114414599139, "grad_norm": 1.3675079345703125, "learning_rate": 6.223847056419154e-06, "loss": 0.8751, "step": 3095 }, { "epoch": 0.6348164855443921, "grad_norm": 1.2996852397918701, "learning_rate": 6.21769778994485e-06, "loss": 0.9395, "step": 3096 }, { "epoch": 0.6350215296288702, "grad_norm": 1.2579585313796997, "learning_rate": 6.211550191999223e-06, "loss": 0.9723, "step": 3097 }, { "epoch": 0.6352265737133483, "grad_norm": 1.3466002941131592, "learning_rate": 6.205404265294223e-06, "loss": 0.9125, "step": 3098 }, { "epoch": 0.6354316177978265, "grad_norm": 1.388780951499939, "learning_rate": 6.199260012541077e-06, "loss": 0.8713, "step": 3099 }, { "epoch": 0.6356366618823047, "grad_norm": 1.2504749298095703, "learning_rate": 6.193117436450264e-06, "loss": 0.9268, "step": 3100 }, { "epoch": 0.6358417059667829, "grad_norm": 1.284639835357666, "learning_rate": 6.186976539731528e-06, "loss": 0.9094, "step": 3101 }, { "epoch": 0.636046750051261, "grad_norm": 1.3468841314315796, "learning_rate": 6.18083732509387e-06, "loss": 0.9587, "step": 3102 }, { "epoch": 0.6362517941357392, "grad_norm": 1.2155990600585938, "learning_rate": 6.174699795245547e-06, "loss": 0.9389, "step": 3103 }, { "epoch": 0.6364568382202174, "grad_norm": 1.2145644426345825, "learning_rate": 6.168563952894077e-06, "loss": 0.9268, "step": 3104 }, { "epoch": 0.6366618823046956, "grad_norm": 1.2678985595703125, "learning_rate": 6.162429800746233e-06, "loss": 0.9732, "step": 3105 }, { "epoch": 0.6368669263891736, "grad_norm": 1.3493661880493164, "learning_rate": 6.156297341508044e-06, "loss": 0.9897, "step": 3106 }, { "epoch": 0.6370719704736518, "grad_norm": 1.1509603261947632, "learning_rate": 6.150166577884781e-06, "loss": 0.8808, "step": 3107 }, { "epoch": 0.63727701455813, "grad_norm": 1.3100138902664185, "learning_rate": 6.1440375125809796e-06, "loss": 0.8964, "step": 3108 }, { "epoch": 0.6374820586426082, "grad_norm": 1.2724894285202026, "learning_rate": 6.137910148300423e-06, "loss": 0.9326, "step": 3109 }, { "epoch": 0.6376871027270863, "grad_norm": 1.2105332612991333, "learning_rate": 6.131784487746142e-06, "loss": 0.9395, "step": 3110 }, { "epoch": 0.6378921468115645, "grad_norm": 1.2816170454025269, "learning_rate": 6.1256605336204215e-06, "loss": 0.9155, "step": 3111 }, { "epoch": 0.6380971908960427, "grad_norm": 1.2567808628082275, "learning_rate": 6.119538288624778e-06, "loss": 0.9207, "step": 3112 }, { "epoch": 0.6383022349805209, "grad_norm": 1.178109884262085, "learning_rate": 6.113417755459994e-06, "loss": 0.9295, "step": 3113 }, { "epoch": 0.6385072790649989, "grad_norm": 1.235737681388855, "learning_rate": 6.107298936826086e-06, "loss": 0.8691, "step": 3114 }, { "epoch": 0.6387123231494771, "grad_norm": 1.191321849822998, "learning_rate": 6.101181835422315e-06, "loss": 0.9892, "step": 3115 }, { "epoch": 0.6389173672339553, "grad_norm": 1.3231713771820068, "learning_rate": 6.095066453947184e-06, "loss": 1.0226, "step": 3116 }, { "epoch": 0.6391224113184335, "grad_norm": 1.250635027885437, "learning_rate": 6.088952795098442e-06, "loss": 0.9153, "step": 3117 }, { "epoch": 0.6393274554029116, "grad_norm": 1.173824429512024, "learning_rate": 6.08284086157307e-06, "loss": 0.9271, "step": 3118 }, { "epoch": 0.6395324994873898, "grad_norm": 1.250526785850525, "learning_rate": 6.076730656067296e-06, "loss": 0.9423, "step": 3119 }, { "epoch": 0.639737543571868, "grad_norm": 1.329487919807434, "learning_rate": 6.070622181276587e-06, "loss": 0.9529, "step": 3120 }, { "epoch": 0.6399425876563462, "grad_norm": 1.2580633163452148, "learning_rate": 6.0645154398956305e-06, "loss": 0.8132, "step": 3121 }, { "epoch": 0.6401476317408242, "grad_norm": 1.4889342784881592, "learning_rate": 6.058410434618367e-06, "loss": 0.8833, "step": 3122 }, { "epoch": 0.6403526758253024, "grad_norm": 1.300847053527832, "learning_rate": 6.052307168137965e-06, "loss": 0.8983, "step": 3123 }, { "epoch": 0.6405577199097806, "grad_norm": 1.2688417434692383, "learning_rate": 6.04620564314683e-06, "loss": 0.9744, "step": 3124 }, { "epoch": 0.6407627639942588, "grad_norm": 1.243818998336792, "learning_rate": 6.040105862336585e-06, "loss": 0.9118, "step": 3125 }, { "epoch": 0.6409678080787369, "grad_norm": 1.2600120306015015, "learning_rate": 6.034007828398099e-06, "loss": 0.9475, "step": 3126 }, { "epoch": 0.6411728521632151, "grad_norm": 1.2785744667053223, "learning_rate": 6.027911544021465e-06, "loss": 0.9477, "step": 3127 }, { "epoch": 0.6413778962476933, "grad_norm": 1.370428204536438, "learning_rate": 6.021817011896004e-06, "loss": 0.8322, "step": 3128 }, { "epoch": 0.6415829403321714, "grad_norm": 1.2313861846923828, "learning_rate": 6.015724234710267e-06, "loss": 0.8957, "step": 3129 }, { "epoch": 0.6417879844166495, "grad_norm": 1.2689582109451294, "learning_rate": 6.009633215152023e-06, "loss": 0.8769, "step": 3130 }, { "epoch": 0.6419930285011277, "grad_norm": 1.2786160707473755, "learning_rate": 6.003543955908273e-06, "loss": 0.928, "step": 3131 }, { "epoch": 0.6421980725856059, "grad_norm": 1.2832231521606445, "learning_rate": 5.997456459665237e-06, "loss": 0.9163, "step": 3132 }, { "epoch": 0.6424031166700841, "grad_norm": 1.3404570817947388, "learning_rate": 5.991370729108363e-06, "loss": 0.9315, "step": 3133 }, { "epoch": 0.6426081607545623, "grad_norm": 1.201655626296997, "learning_rate": 5.985286766922316e-06, "loss": 0.8414, "step": 3134 }, { "epoch": 0.6428132048390404, "grad_norm": 1.409120798110962, "learning_rate": 5.979204575790976e-06, "loss": 0.8813, "step": 3135 }, { "epoch": 0.6430182489235186, "grad_norm": 1.1467537879943848, "learning_rate": 5.973124158397451e-06, "loss": 0.9701, "step": 3136 }, { "epoch": 0.6432232930079967, "grad_norm": 1.2181837558746338, "learning_rate": 5.967045517424062e-06, "loss": 0.8976, "step": 3137 }, { "epoch": 0.6434283370924749, "grad_norm": 1.2172120809555054, "learning_rate": 5.9609686555523515e-06, "loss": 0.9011, "step": 3138 }, { "epoch": 0.643633381176953, "grad_norm": 1.238120436668396, "learning_rate": 5.954893575463064e-06, "loss": 0.9176, "step": 3139 }, { "epoch": 0.6438384252614312, "grad_norm": 1.2715468406677246, "learning_rate": 5.9488202798361715e-06, "loss": 0.7937, "step": 3140 }, { "epoch": 0.6440434693459094, "grad_norm": 1.2064276933670044, "learning_rate": 5.942748771350853e-06, "loss": 0.8917, "step": 3141 }, { "epoch": 0.6442485134303876, "grad_norm": 1.1659373044967651, "learning_rate": 5.936679052685505e-06, "loss": 0.8916, "step": 3142 }, { "epoch": 0.6444535575148657, "grad_norm": 1.2589911222457886, "learning_rate": 5.930611126517719e-06, "loss": 0.9504, "step": 3143 }, { "epoch": 0.6446586015993439, "grad_norm": 1.311951756477356, "learning_rate": 5.924544995524313e-06, "loss": 0.8801, "step": 3144 }, { "epoch": 0.644863645683822, "grad_norm": 1.314327597618103, "learning_rate": 5.918480662381305e-06, "loss": 0.9047, "step": 3145 }, { "epoch": 0.6450686897683002, "grad_norm": 1.1749168634414673, "learning_rate": 5.912418129763922e-06, "loss": 0.9176, "step": 3146 }, { "epoch": 0.6452737338527783, "grad_norm": 1.124241590499878, "learning_rate": 5.906357400346596e-06, "loss": 0.8248, "step": 3147 }, { "epoch": 0.6454787779372565, "grad_norm": 1.2016652822494507, "learning_rate": 5.90029847680296e-06, "loss": 0.879, "step": 3148 }, { "epoch": 0.6456838220217347, "grad_norm": 1.2621798515319824, "learning_rate": 5.894241361805859e-06, "loss": 0.89, "step": 3149 }, { "epoch": 0.6458888661062129, "grad_norm": 1.2146905660629272, "learning_rate": 5.8881860580273285e-06, "loss": 0.9366, "step": 3150 }, { "epoch": 0.646093910190691, "grad_norm": 1.3426638841629028, "learning_rate": 5.882132568138621e-06, "loss": 0.9137, "step": 3151 }, { "epoch": 0.6462989542751691, "grad_norm": 1.295542597770691, "learning_rate": 5.876080894810167e-06, "loss": 0.8741, "step": 3152 }, { "epoch": 0.6465039983596473, "grad_norm": 1.2486062049865723, "learning_rate": 5.870031040711616e-06, "loss": 0.9202, "step": 3153 }, { "epoch": 0.6467090424441255, "grad_norm": 1.2604871988296509, "learning_rate": 5.863983008511805e-06, "loss": 0.9324, "step": 3154 }, { "epoch": 0.6469140865286036, "grad_norm": 1.2733978033065796, "learning_rate": 5.857936800878767e-06, "loss": 0.8718, "step": 3155 }, { "epoch": 0.6471191306130818, "grad_norm": 1.3421127796173096, "learning_rate": 5.851892420479739e-06, "loss": 0.8742, "step": 3156 }, { "epoch": 0.64732417469756, "grad_norm": 1.1834404468536377, "learning_rate": 5.845849869981137e-06, "loss": 0.8978, "step": 3157 }, { "epoch": 0.6475292187820382, "grad_norm": 1.2466413974761963, "learning_rate": 5.839809152048581e-06, "loss": 0.8813, "step": 3158 }, { "epoch": 0.6477342628665163, "grad_norm": 1.2282079458236694, "learning_rate": 5.8337702693468824e-06, "loss": 0.9201, "step": 3159 }, { "epoch": 0.6479393069509944, "grad_norm": 1.2216660976409912, "learning_rate": 5.827733224540037e-06, "loss": 0.895, "step": 3160 }, { "epoch": 0.6481443510354726, "grad_norm": 1.2766233682632446, "learning_rate": 5.821698020291234e-06, "loss": 0.9184, "step": 3161 }, { "epoch": 0.6483493951199508, "grad_norm": 1.2284002304077148, "learning_rate": 5.815664659262845e-06, "loss": 0.9121, "step": 3162 }, { "epoch": 0.6485544392044289, "grad_norm": 1.3512905836105347, "learning_rate": 5.809633144116441e-06, "loss": 0.947, "step": 3163 }, { "epoch": 0.6487594832889071, "grad_norm": 1.1366990804672241, "learning_rate": 5.803603477512763e-06, "loss": 0.9023, "step": 3164 }, { "epoch": 0.6489645273733853, "grad_norm": 1.4352682828903198, "learning_rate": 5.797575662111756e-06, "loss": 0.8206, "step": 3165 }, { "epoch": 0.6491695714578635, "grad_norm": 1.4331482648849487, "learning_rate": 5.79154970057252e-06, "loss": 0.9333, "step": 3166 }, { "epoch": 0.6493746155423415, "grad_norm": 1.2059566974639893, "learning_rate": 5.78552559555337e-06, "loss": 0.9085, "step": 3167 }, { "epoch": 0.6495796596268197, "grad_norm": 1.2424942255020142, "learning_rate": 5.7795033497117724e-06, "loss": 0.8933, "step": 3168 }, { "epoch": 0.6497847037112979, "grad_norm": 1.194124698638916, "learning_rate": 5.7734829657044e-06, "loss": 0.9464, "step": 3169 }, { "epoch": 0.6499897477957761, "grad_norm": 1.258884072303772, "learning_rate": 5.7674644461870785e-06, "loss": 0.8333, "step": 3170 }, { "epoch": 0.6501947918802543, "grad_norm": 1.2437628507614136, "learning_rate": 5.761447793814832e-06, "loss": 0.9103, "step": 3171 }, { "epoch": 0.6503998359647324, "grad_norm": 1.3292579650878906, "learning_rate": 5.755433011241851e-06, "loss": 0.9794, "step": 3172 }, { "epoch": 0.6506048800492106, "grad_norm": 1.23637855052948, "learning_rate": 5.749420101121499e-06, "loss": 0.9343, "step": 3173 }, { "epoch": 0.6508099241336888, "grad_norm": 1.373829960823059, "learning_rate": 5.743409066106327e-06, "loss": 0.8492, "step": 3174 }, { "epoch": 0.651014968218167, "grad_norm": 1.246512770652771, "learning_rate": 5.7373999088480346e-06, "loss": 0.8816, "step": 3175 }, { "epoch": 0.651220012302645, "grad_norm": 1.3156170845031738, "learning_rate": 5.73139263199752e-06, "loss": 0.8976, "step": 3176 }, { "epoch": 0.6514250563871232, "grad_norm": 1.3070929050445557, "learning_rate": 5.725387238204831e-06, "loss": 0.8968, "step": 3177 }, { "epoch": 0.6516301004716014, "grad_norm": 1.3164646625518799, "learning_rate": 5.719383730119205e-06, "loss": 0.9058, "step": 3178 }, { "epoch": 0.6518351445560796, "grad_norm": 1.1996740102767944, "learning_rate": 5.713382110389021e-06, "loss": 0.8407, "step": 3179 }, { "epoch": 0.6520401886405577, "grad_norm": 1.3001128435134888, "learning_rate": 5.7073823816618505e-06, "loss": 0.9037, "step": 3180 }, { "epoch": 0.6522452327250359, "grad_norm": 1.2151257991790771, "learning_rate": 5.70138454658441e-06, "loss": 0.8843, "step": 3181 }, { "epoch": 0.6524502768095141, "grad_norm": 1.2330374717712402, "learning_rate": 5.695388607802603e-06, "loss": 0.8917, "step": 3182 }, { "epoch": 0.6526553208939923, "grad_norm": 1.2720532417297363, "learning_rate": 5.689394567961477e-06, "loss": 0.9515, "step": 3183 }, { "epoch": 0.6528603649784703, "grad_norm": 1.3151313066482544, "learning_rate": 5.683402429705249e-06, "loss": 0.8197, "step": 3184 }, { "epoch": 0.6530654090629485, "grad_norm": 1.3670965433120728, "learning_rate": 5.677412195677295e-06, "loss": 0.9585, "step": 3185 }, { "epoch": 0.6532704531474267, "grad_norm": 1.1895356178283691, "learning_rate": 5.671423868520158e-06, "loss": 0.8962, "step": 3186 }, { "epoch": 0.6534754972319049, "grad_norm": 1.2271708250045776, "learning_rate": 5.665437450875534e-06, "loss": 0.9672, "step": 3187 }, { "epoch": 0.653680541316383, "grad_norm": 1.2624585628509521, "learning_rate": 5.659452945384275e-06, "loss": 0.8904, "step": 3188 }, { "epoch": 0.6538855854008612, "grad_norm": 1.2269099950790405, "learning_rate": 5.653470354686392e-06, "loss": 0.922, "step": 3189 }, { "epoch": 0.6540906294853394, "grad_norm": 1.2099369764328003, "learning_rate": 5.64748968142105e-06, "loss": 0.8293, "step": 3190 }, { "epoch": 0.6542956735698175, "grad_norm": 1.2357794046401978, "learning_rate": 5.6415109282265745e-06, "loss": 0.9231, "step": 3191 }, { "epoch": 0.6545007176542956, "grad_norm": 1.2606943845748901, "learning_rate": 5.635534097740435e-06, "loss": 0.887, "step": 3192 }, { "epoch": 0.6547057617387738, "grad_norm": 1.2113698720932007, "learning_rate": 5.629559192599257e-06, "loss": 0.8752, "step": 3193 }, { "epoch": 0.654910805823252, "grad_norm": 1.2968538999557495, "learning_rate": 5.623586215438813e-06, "loss": 0.7874, "step": 3194 }, { "epoch": 0.6551158499077302, "grad_norm": 1.2210910320281982, "learning_rate": 5.617615168894036e-06, "loss": 0.9749, "step": 3195 }, { "epoch": 0.6553208939922083, "grad_norm": 1.206763505935669, "learning_rate": 5.611646055598995e-06, "loss": 0.8314, "step": 3196 }, { "epoch": 0.6555259380766865, "grad_norm": 1.146892786026001, "learning_rate": 5.605678878186911e-06, "loss": 0.87, "step": 3197 }, { "epoch": 0.6557309821611647, "grad_norm": 1.1646896600723267, "learning_rate": 5.599713639290148e-06, "loss": 0.8608, "step": 3198 }, { "epoch": 0.6559360262456428, "grad_norm": 1.1583210229873657, "learning_rate": 5.593750341540223e-06, "loss": 0.9325, "step": 3199 }, { "epoch": 0.6561410703301209, "grad_norm": 1.3441904783248901, "learning_rate": 5.587788987567785e-06, "loss": 0.8788, "step": 3200 }, { "epoch": 0.6563461144145991, "grad_norm": 1.2908639907836914, "learning_rate": 5.581829580002644e-06, "loss": 0.9239, "step": 3201 }, { "epoch": 0.6565511584990773, "grad_norm": 1.2190823554992676, "learning_rate": 5.575872121473722e-06, "loss": 0.8725, "step": 3202 }, { "epoch": 0.6567562025835555, "grad_norm": 1.2346464395523071, "learning_rate": 5.5699166146091126e-06, "loss": 0.8561, "step": 3203 }, { "epoch": 0.6569612466680337, "grad_norm": 1.3373113870620728, "learning_rate": 5.5639630620360265e-06, "loss": 0.8924, "step": 3204 }, { "epoch": 0.6571662907525118, "grad_norm": 1.3308669328689575, "learning_rate": 5.558011466380824e-06, "loss": 0.8711, "step": 3205 }, { "epoch": 0.65737133483699, "grad_norm": 1.215625524520874, "learning_rate": 5.552061830268995e-06, "loss": 0.8682, "step": 3206 }, { "epoch": 0.6575763789214681, "grad_norm": 1.3180807828903198, "learning_rate": 5.546114156325166e-06, "loss": 0.8948, "step": 3207 }, { "epoch": 0.6577814230059463, "grad_norm": 1.2737329006195068, "learning_rate": 5.540168447173108e-06, "loss": 0.951, "step": 3208 }, { "epoch": 0.6579864670904244, "grad_norm": 1.2264255285263062, "learning_rate": 5.534224705435707e-06, "loss": 0.911, "step": 3209 }, { "epoch": 0.6581915111749026, "grad_norm": 1.1796847581863403, "learning_rate": 5.528282933735005e-06, "loss": 0.8914, "step": 3210 }, { "epoch": 0.6583965552593808, "grad_norm": 1.214645504951477, "learning_rate": 5.5223431346921475e-06, "loss": 0.8577, "step": 3211 }, { "epoch": 0.658601599343859, "grad_norm": 1.3495696783065796, "learning_rate": 5.516405310927431e-06, "loss": 0.951, "step": 3212 }, { "epoch": 0.6588066434283371, "grad_norm": 1.2314900159835815, "learning_rate": 5.510469465060268e-06, "loss": 0.969, "step": 3213 }, { "epoch": 0.6590116875128152, "grad_norm": 1.2451070547103882, "learning_rate": 5.504535599709214e-06, "loss": 0.896, "step": 3214 }, { "epoch": 0.6592167315972934, "grad_norm": 1.2798986434936523, "learning_rate": 5.498603717491926e-06, "loss": 1.0009, "step": 3215 }, { "epoch": 0.6594217756817716, "grad_norm": 1.2745133638381958, "learning_rate": 5.492673821025212e-06, "loss": 0.9099, "step": 3216 }, { "epoch": 0.6596268197662497, "grad_norm": 1.389123558998108, "learning_rate": 5.4867459129249846e-06, "loss": 0.9504, "step": 3217 }, { "epoch": 0.6598318638507279, "grad_norm": 1.2612260580062866, "learning_rate": 5.480819995806296e-06, "loss": 0.985, "step": 3218 }, { "epoch": 0.6600369079352061, "grad_norm": 1.2682081460952759, "learning_rate": 5.474896072283306e-06, "loss": 0.8773, "step": 3219 }, { "epoch": 0.6602419520196843, "grad_norm": 1.2007625102996826, "learning_rate": 5.468974144969301e-06, "loss": 0.7958, "step": 3220 }, { "epoch": 0.6604469961041624, "grad_norm": 1.3273378610610962, "learning_rate": 5.463054216476687e-06, "loss": 0.9618, "step": 3221 }, { "epoch": 0.6606520401886405, "grad_norm": 1.2455986738204956, "learning_rate": 5.4571362894169795e-06, "loss": 0.959, "step": 3222 }, { "epoch": 0.6608570842731187, "grad_norm": 1.2563602924346924, "learning_rate": 5.451220366400832e-06, "loss": 0.8358, "step": 3223 }, { "epoch": 0.6610621283575969, "grad_norm": 1.2428721189498901, "learning_rate": 5.445306450037995e-06, "loss": 0.884, "step": 3224 }, { "epoch": 0.661267172442075, "grad_norm": 1.3157564401626587, "learning_rate": 5.4393945429373375e-06, "loss": 0.9195, "step": 3225 }, { "epoch": 0.6614722165265532, "grad_norm": 1.3110655546188354, "learning_rate": 5.4334846477068435e-06, "loss": 0.9199, "step": 3226 }, { "epoch": 0.6616772606110314, "grad_norm": 1.2684509754180908, "learning_rate": 5.427576766953615e-06, "loss": 0.8954, "step": 3227 }, { "epoch": 0.6618823046955096, "grad_norm": 1.3206162452697754, "learning_rate": 5.4216709032838595e-06, "loss": 0.9535, "step": 3228 }, { "epoch": 0.6620873487799877, "grad_norm": 1.2848538160324097, "learning_rate": 5.415767059302895e-06, "loss": 0.9511, "step": 3229 }, { "epoch": 0.6622923928644658, "grad_norm": 1.3545491695404053, "learning_rate": 5.409865237615144e-06, "loss": 0.8813, "step": 3230 }, { "epoch": 0.662497436948944, "grad_norm": 1.1606605052947998, "learning_rate": 5.403965440824151e-06, "loss": 0.8845, "step": 3231 }, { "epoch": 0.6627024810334222, "grad_norm": 1.2018237113952637, "learning_rate": 5.398067671532554e-06, "loss": 0.9135, "step": 3232 }, { "epoch": 0.6629075251179003, "grad_norm": 1.1399229764938354, "learning_rate": 5.392171932342101e-06, "loss": 0.8644, "step": 3233 }, { "epoch": 0.6631125692023785, "grad_norm": 1.277392864227295, "learning_rate": 5.386278225853643e-06, "loss": 0.9363, "step": 3234 }, { "epoch": 0.6633176132868567, "grad_norm": 1.143136739730835, "learning_rate": 5.380386554667139e-06, "loss": 0.8643, "step": 3235 }, { "epoch": 0.6635226573713349, "grad_norm": 1.2742886543273926, "learning_rate": 5.374496921381647e-06, "loss": 0.8599, "step": 3236 }, { "epoch": 0.663727701455813, "grad_norm": 1.2035337686538696, "learning_rate": 5.368609328595323e-06, "loss": 0.9144, "step": 3237 }, { "epoch": 0.6639327455402911, "grad_norm": 1.3217653036117554, "learning_rate": 5.362723778905427e-06, "loss": 0.8881, "step": 3238 }, { "epoch": 0.6641377896247693, "grad_norm": 1.2458962202072144, "learning_rate": 5.356840274908315e-06, "loss": 0.9285, "step": 3239 }, { "epoch": 0.6643428337092475, "grad_norm": 1.2019394636154175, "learning_rate": 5.350958819199444e-06, "loss": 0.9198, "step": 3240 }, { "epoch": 0.6645478777937257, "grad_norm": 1.3396494388580322, "learning_rate": 5.345079414373366e-06, "loss": 0.9021, "step": 3241 }, { "epoch": 0.6647529218782038, "grad_norm": 1.2840790748596191, "learning_rate": 5.339202063023727e-06, "loss": 0.9045, "step": 3242 }, { "epoch": 0.664957965962682, "grad_norm": 1.3257856369018555, "learning_rate": 5.333326767743263e-06, "loss": 1.0073, "step": 3243 }, { "epoch": 0.6651630100471602, "grad_norm": 1.2769262790679932, "learning_rate": 5.327453531123816e-06, "loss": 0.8785, "step": 3244 }, { "epoch": 0.6653680541316384, "grad_norm": 1.2759672403335571, "learning_rate": 5.321582355756304e-06, "loss": 0.9613, "step": 3245 }, { "epoch": 0.6655730982161164, "grad_norm": 1.2010349035263062, "learning_rate": 5.315713244230755e-06, "loss": 0.8344, "step": 3246 }, { "epoch": 0.6657781423005946, "grad_norm": 1.2056975364685059, "learning_rate": 5.309846199136258e-06, "loss": 0.9489, "step": 3247 }, { "epoch": 0.6659831863850728, "grad_norm": 1.2539863586425781, "learning_rate": 5.303981223061021e-06, "loss": 0.9384, "step": 3248 }, { "epoch": 0.666188230469551, "grad_norm": 1.1983355283737183, "learning_rate": 5.298118318592316e-06, "loss": 0.8366, "step": 3249 }, { "epoch": 0.6663932745540291, "grad_norm": 1.3411771059036255, "learning_rate": 5.292257488316524e-06, "loss": 0.947, "step": 3250 }, { "epoch": 0.6665983186385073, "grad_norm": 1.2047377824783325, "learning_rate": 5.286398734819081e-06, "loss": 0.8866, "step": 3251 }, { "epoch": 0.6668033627229855, "grad_norm": 1.3138834238052368, "learning_rate": 5.280542060684535e-06, "loss": 0.9033, "step": 3252 }, { "epoch": 0.6670084068074636, "grad_norm": 1.3732109069824219, "learning_rate": 5.2746874684965014e-06, "loss": 0.9113, "step": 3253 }, { "epoch": 0.6672134508919417, "grad_norm": 1.240618348121643, "learning_rate": 5.2688349608376775e-06, "loss": 0.8094, "step": 3254 }, { "epoch": 0.6674184949764199, "grad_norm": 1.2923345565795898, "learning_rate": 5.262984540289857e-06, "loss": 0.9284, "step": 3255 }, { "epoch": 0.6676235390608981, "grad_norm": 1.2307615280151367, "learning_rate": 5.257136209433884e-06, "loss": 0.8677, "step": 3256 }, { "epoch": 0.6678285831453763, "grad_norm": 1.2096288204193115, "learning_rate": 5.2512899708497086e-06, "loss": 0.8494, "step": 3257 }, { "epoch": 0.6680336272298544, "grad_norm": 1.2931607961654663, "learning_rate": 5.24544582711634e-06, "loss": 0.9214, "step": 3258 }, { "epoch": 0.6682386713143326, "grad_norm": 1.2468754053115845, "learning_rate": 5.23960378081188e-06, "loss": 0.8641, "step": 3259 }, { "epoch": 0.6684437153988108, "grad_norm": 1.255079984664917, "learning_rate": 5.233763834513479e-06, "loss": 0.8769, "step": 3260 }, { "epoch": 0.668648759483289, "grad_norm": 1.2788540124893188, "learning_rate": 5.227925990797389e-06, "loss": 0.9294, "step": 3261 }, { "epoch": 0.668853803567767, "grad_norm": 1.3060256242752075, "learning_rate": 5.222090252238916e-06, "loss": 0.8521, "step": 3262 }, { "epoch": 0.6690588476522452, "grad_norm": 1.1982966661453247, "learning_rate": 5.21625662141245e-06, "loss": 0.9238, "step": 3263 }, { "epoch": 0.6692638917367234, "grad_norm": 1.2980952262878418, "learning_rate": 5.2104251008914405e-06, "loss": 0.975, "step": 3264 }, { "epoch": 0.6694689358212016, "grad_norm": 1.3062714338302612, "learning_rate": 5.204595693248413e-06, "loss": 0.9057, "step": 3265 }, { "epoch": 0.6696739799056797, "grad_norm": 1.293008804321289, "learning_rate": 5.198768401054952e-06, "loss": 0.9194, "step": 3266 }, { "epoch": 0.6698790239901579, "grad_norm": 1.4094657897949219, "learning_rate": 5.192943226881724e-06, "loss": 0.9261, "step": 3267 }, { "epoch": 0.670084068074636, "grad_norm": 1.2192128896713257, "learning_rate": 5.187120173298448e-06, "loss": 0.8859, "step": 3268 }, { "epoch": 0.6702891121591142, "grad_norm": 1.3364628553390503, "learning_rate": 5.181299242873912e-06, "loss": 0.8414, "step": 3269 }, { "epoch": 0.6704941562435923, "grad_norm": 1.2269033193588257, "learning_rate": 5.175480438175966e-06, "loss": 0.9023, "step": 3270 }, { "epoch": 0.6706992003280705, "grad_norm": 1.2319475412368774, "learning_rate": 5.169663761771522e-06, "loss": 0.8252, "step": 3271 }, { "epoch": 0.6709042444125487, "grad_norm": 1.194408893585205, "learning_rate": 5.163849216226562e-06, "loss": 0.9138, "step": 3272 }, { "epoch": 0.6711092884970269, "grad_norm": 1.4509514570236206, "learning_rate": 5.158036804106114e-06, "loss": 0.9834, "step": 3273 }, { "epoch": 0.671314332581505, "grad_norm": 1.2318764925003052, "learning_rate": 5.152226527974273e-06, "loss": 0.9427, "step": 3274 }, { "epoch": 0.6715193766659832, "grad_norm": 1.2911465167999268, "learning_rate": 5.1464183903941875e-06, "loss": 0.9407, "step": 3275 }, { "epoch": 0.6717244207504613, "grad_norm": 1.1304625272750854, "learning_rate": 5.140612393928073e-06, "loss": 0.9184, "step": 3276 }, { "epoch": 0.6719294648349395, "grad_norm": 1.2453575134277344, "learning_rate": 5.134808541137183e-06, "loss": 0.8597, "step": 3277 }, { "epoch": 0.6721345089194177, "grad_norm": 1.354857087135315, "learning_rate": 5.129006834581848e-06, "loss": 0.9911, "step": 3278 }, { "epoch": 0.6723395530038958, "grad_norm": 1.2863750457763672, "learning_rate": 5.123207276821422e-06, "loss": 0.7669, "step": 3279 }, { "epoch": 0.672544597088374, "grad_norm": 1.2181174755096436, "learning_rate": 5.117409870414343e-06, "loss": 0.9663, "step": 3280 }, { "epoch": 0.6727496411728522, "grad_norm": 1.1555238962173462, "learning_rate": 5.111614617918072e-06, "loss": 0.8364, "step": 3281 }, { "epoch": 0.6729546852573304, "grad_norm": 1.2672007083892822, "learning_rate": 5.105821521889147e-06, "loss": 0.9482, "step": 3282 }, { "epoch": 0.6731597293418085, "grad_norm": 1.2789483070373535, "learning_rate": 5.100030584883127e-06, "loss": 0.8867, "step": 3283 }, { "epoch": 0.6733647734262866, "grad_norm": 1.3270933628082275, "learning_rate": 5.094241809454639e-06, "loss": 0.9248, "step": 3284 }, { "epoch": 0.6735698175107648, "grad_norm": 1.3755313158035278, "learning_rate": 5.088455198157347e-06, "loss": 0.9676, "step": 3285 }, { "epoch": 0.673774861595243, "grad_norm": 1.3114084005355835, "learning_rate": 5.082670753543961e-06, "loss": 0.9522, "step": 3286 }, { "epoch": 0.6739799056797211, "grad_norm": 1.3444198369979858, "learning_rate": 5.076888478166247e-06, "loss": 0.9383, "step": 3287 }, { "epoch": 0.6741849497641993, "grad_norm": 1.2526172399520874, "learning_rate": 5.071108374574987e-06, "loss": 0.7871, "step": 3288 }, { "epoch": 0.6743899938486775, "grad_norm": 1.1363283395767212, "learning_rate": 5.065330445320036e-06, "loss": 0.8902, "step": 3289 }, { "epoch": 0.6745950379331557, "grad_norm": 1.3366637229919434, "learning_rate": 5.0595546929502644e-06, "loss": 0.8807, "step": 3290 }, { "epoch": 0.6748000820176338, "grad_norm": 1.2622227668762207, "learning_rate": 5.053781120013609e-06, "loss": 0.8779, "step": 3291 }, { "epoch": 0.6750051261021119, "grad_norm": 1.310050368309021, "learning_rate": 5.048009729057012e-06, "loss": 0.7907, "step": 3292 }, { "epoch": 0.6752101701865901, "grad_norm": 1.2842274904251099, "learning_rate": 5.0422405226264825e-06, "loss": 0.9292, "step": 3293 }, { "epoch": 0.6754152142710683, "grad_norm": 1.3222262859344482, "learning_rate": 5.0364735032670476e-06, "loss": 0.8826, "step": 3294 }, { "epoch": 0.6756202583555464, "grad_norm": 1.2269796133041382, "learning_rate": 5.0307086735227815e-06, "loss": 0.9487, "step": 3295 }, { "epoch": 0.6758253024400246, "grad_norm": 1.3658783435821533, "learning_rate": 5.0249460359367864e-06, "loss": 0.9296, "step": 3296 }, { "epoch": 0.6760303465245028, "grad_norm": 1.2092376947402954, "learning_rate": 5.0191855930511946e-06, "loss": 0.8881, "step": 3297 }, { "epoch": 0.676235390608981, "grad_norm": 1.3363207578659058, "learning_rate": 5.0134273474071725e-06, "loss": 0.9667, "step": 3298 }, { "epoch": 0.676440434693459, "grad_norm": 1.282605767250061, "learning_rate": 5.007671301544925e-06, "loss": 0.8922, "step": 3299 }, { "epoch": 0.6766454787779372, "grad_norm": 1.244703769683838, "learning_rate": 5.001917458003678e-06, "loss": 0.8744, "step": 3300 }, { "epoch": 0.6768505228624154, "grad_norm": 1.2928826808929443, "learning_rate": 4.9961658193216865e-06, "loss": 0.8947, "step": 3301 }, { "epoch": 0.6770555669468936, "grad_norm": 1.173845648765564, "learning_rate": 4.990416388036233e-06, "loss": 0.8909, "step": 3302 }, { "epoch": 0.6772606110313717, "grad_norm": 1.1477001905441284, "learning_rate": 4.984669166683627e-06, "loss": 0.8747, "step": 3303 }, { "epoch": 0.6774656551158499, "grad_norm": 1.1409227848052979, "learning_rate": 4.978924157799208e-06, "loss": 0.8658, "step": 3304 }, { "epoch": 0.6776706992003281, "grad_norm": 1.3570294380187988, "learning_rate": 4.973181363917333e-06, "loss": 0.8951, "step": 3305 }, { "epoch": 0.6778757432848063, "grad_norm": 1.2384833097457886, "learning_rate": 4.967440787571384e-06, "loss": 0.8556, "step": 3306 }, { "epoch": 0.6780807873692843, "grad_norm": 1.330822467803955, "learning_rate": 4.961702431293759e-06, "loss": 0.82, "step": 3307 }, { "epoch": 0.6782858314537625, "grad_norm": 1.2399476766586304, "learning_rate": 4.955966297615893e-06, "loss": 0.9007, "step": 3308 }, { "epoch": 0.6784908755382407, "grad_norm": 1.3049287796020508, "learning_rate": 4.950232389068224e-06, "loss": 0.8759, "step": 3309 }, { "epoch": 0.6786959196227189, "grad_norm": 1.2623074054718018, "learning_rate": 4.944500708180215e-06, "loss": 0.925, "step": 3310 }, { "epoch": 0.6789009637071971, "grad_norm": 1.1566534042358398, "learning_rate": 4.9387712574803425e-06, "loss": 0.9335, "step": 3311 }, { "epoch": 0.6791060077916752, "grad_norm": 1.2608753442764282, "learning_rate": 4.933044039496107e-06, "loss": 0.874, "step": 3312 }, { "epoch": 0.6793110518761534, "grad_norm": 1.3596234321594238, "learning_rate": 4.927319056754016e-06, "loss": 0.9582, "step": 3313 }, { "epoch": 0.6795160959606316, "grad_norm": 1.2340842485427856, "learning_rate": 4.921596311779603e-06, "loss": 0.8825, "step": 3314 }, { "epoch": 0.6797211400451098, "grad_norm": 1.1245440244674683, "learning_rate": 4.91587580709739e-06, "loss": 0.8319, "step": 3315 }, { "epoch": 0.6799261841295878, "grad_norm": 1.2463171482086182, "learning_rate": 4.910157545230939e-06, "loss": 0.8987, "step": 3316 }, { "epoch": 0.680131228214066, "grad_norm": 1.2633835077285767, "learning_rate": 4.904441528702806e-06, "loss": 0.9288, "step": 3317 }, { "epoch": 0.6803362722985442, "grad_norm": 1.2422699928283691, "learning_rate": 4.898727760034558e-06, "loss": 0.9027, "step": 3318 }, { "epoch": 0.6805413163830224, "grad_norm": 1.1978085041046143, "learning_rate": 4.893016241746778e-06, "loss": 0.8929, "step": 3319 }, { "epoch": 0.6807463604675005, "grad_norm": 1.2561640739440918, "learning_rate": 4.887306976359041e-06, "loss": 0.8738, "step": 3320 }, { "epoch": 0.6809514045519787, "grad_norm": 1.2534548044204712, "learning_rate": 4.88159996638995e-06, "loss": 0.8658, "step": 3321 }, { "epoch": 0.6811564486364569, "grad_norm": 1.2981278896331787, "learning_rate": 4.875895214357093e-06, "loss": 0.9007, "step": 3322 }, { "epoch": 0.681361492720935, "grad_norm": 1.2696677446365356, "learning_rate": 4.87019272277708e-06, "loss": 0.8385, "step": 3323 }, { "epoch": 0.6815665368054131, "grad_norm": 1.1793570518493652, "learning_rate": 4.8644924941655e-06, "loss": 0.8299, "step": 3324 }, { "epoch": 0.6817715808898913, "grad_norm": 1.2336207628250122, "learning_rate": 4.8587945310369685e-06, "loss": 0.8234, "step": 3325 }, { "epoch": 0.6819766249743695, "grad_norm": 1.3057116270065308, "learning_rate": 4.853098835905085e-06, "loss": 0.9208, "step": 3326 }, { "epoch": 0.6821816690588477, "grad_norm": 1.2929192781448364, "learning_rate": 4.847405411282462e-06, "loss": 0.9451, "step": 3327 }, { "epoch": 0.6823867131433258, "grad_norm": 1.2362284660339355, "learning_rate": 4.841714259680691e-06, "loss": 0.893, "step": 3328 }, { "epoch": 0.682591757227804, "grad_norm": 1.2348911762237549, "learning_rate": 4.836025383610382e-06, "loss": 0.9099, "step": 3329 }, { "epoch": 0.6827968013122822, "grad_norm": 1.2837132215499878, "learning_rate": 4.8303387855811245e-06, "loss": 0.8972, "step": 3330 }, { "epoch": 0.6830018453967603, "grad_norm": 1.2487074136734009, "learning_rate": 4.824654468101519e-06, "loss": 0.9522, "step": 3331 }, { "epoch": 0.6832068894812384, "grad_norm": 1.2492314577102661, "learning_rate": 4.818972433679145e-06, "loss": 0.8342, "step": 3332 }, { "epoch": 0.6834119335657166, "grad_norm": 1.1988846063613892, "learning_rate": 4.813292684820583e-06, "loss": 0.9925, "step": 3333 }, { "epoch": 0.6836169776501948, "grad_norm": 1.1627423763275146, "learning_rate": 4.8076152240314026e-06, "loss": 0.9028, "step": 3334 }, { "epoch": 0.683822021734673, "grad_norm": 1.2207462787628174, "learning_rate": 4.801940053816159e-06, "loss": 0.8908, "step": 3335 }, { "epoch": 0.6840270658191511, "grad_norm": 1.30246102809906, "learning_rate": 4.796267176678416e-06, "loss": 0.8962, "step": 3336 }, { "epoch": 0.6842321099036293, "grad_norm": 1.1943650245666504, "learning_rate": 4.790596595120699e-06, "loss": 0.9053, "step": 3337 }, { "epoch": 0.6844371539881074, "grad_norm": 1.2183176279067993, "learning_rate": 4.784928311644541e-06, "loss": 0.8315, "step": 3338 }, { "epoch": 0.6846421980725856, "grad_norm": 1.2736810445785522, "learning_rate": 4.7792623287504516e-06, "loss": 0.8649, "step": 3339 }, { "epoch": 0.6848472421570637, "grad_norm": 1.2421822547912598, "learning_rate": 4.773598648937932e-06, "loss": 0.826, "step": 3340 }, { "epoch": 0.6850522862415419, "grad_norm": 1.179642677307129, "learning_rate": 4.7679372747054616e-06, "loss": 0.9053, "step": 3341 }, { "epoch": 0.6852573303260201, "grad_norm": 1.2188581228256226, "learning_rate": 4.762278208550505e-06, "loss": 0.8741, "step": 3342 }, { "epoch": 0.6854623744104983, "grad_norm": 1.3003910779953003, "learning_rate": 4.756621452969504e-06, "loss": 0.8923, "step": 3343 }, { "epoch": 0.6856674184949764, "grad_norm": 1.2821911573410034, "learning_rate": 4.7509670104578945e-06, "loss": 0.8602, "step": 3344 }, { "epoch": 0.6858724625794546, "grad_norm": 1.2943722009658813, "learning_rate": 4.745314883510077e-06, "loss": 0.8863, "step": 3345 }, { "epoch": 0.6860775066639327, "grad_norm": 1.3422447443008423, "learning_rate": 4.7396650746194384e-06, "loss": 0.9289, "step": 3346 }, { "epoch": 0.6862825507484109, "grad_norm": 1.3907805681228638, "learning_rate": 4.734017586278337e-06, "loss": 0.9521, "step": 3347 }, { "epoch": 0.6864875948328891, "grad_norm": 1.249023199081421, "learning_rate": 4.728372420978119e-06, "loss": 0.917, "step": 3348 }, { "epoch": 0.6866926389173672, "grad_norm": 1.2805886268615723, "learning_rate": 4.7227295812090945e-06, "loss": 0.8504, "step": 3349 }, { "epoch": 0.6868976830018454, "grad_norm": 1.279034972190857, "learning_rate": 4.717089069460552e-06, "loss": 0.9114, "step": 3350 }, { "epoch": 0.6871027270863236, "grad_norm": 1.1635698080062866, "learning_rate": 4.711450888220752e-06, "loss": 0.87, "step": 3351 }, { "epoch": 0.6873077711708018, "grad_norm": 1.2100483179092407, "learning_rate": 4.7058150399769245e-06, "loss": 0.9318, "step": 3352 }, { "epoch": 0.6875128152552799, "grad_norm": 1.26199209690094, "learning_rate": 4.700181527215279e-06, "loss": 0.859, "step": 3353 }, { "epoch": 0.687717859339758, "grad_norm": 1.13788640499115, "learning_rate": 4.694550352420989e-06, "loss": 0.8791, "step": 3354 }, { "epoch": 0.6879229034242362, "grad_norm": 1.266270399093628, "learning_rate": 4.688921518078194e-06, "loss": 0.8375, "step": 3355 }, { "epoch": 0.6881279475087144, "grad_norm": 1.1557801961898804, "learning_rate": 4.683295026670001e-06, "loss": 0.8338, "step": 3356 }, { "epoch": 0.6883329915931925, "grad_norm": 1.1886496543884277, "learning_rate": 4.677670880678493e-06, "loss": 0.8934, "step": 3357 }, { "epoch": 0.6885380356776707, "grad_norm": 1.1793218851089478, "learning_rate": 4.672049082584705e-06, "loss": 0.9018, "step": 3358 }, { "epoch": 0.6887430797621489, "grad_norm": 1.2535368204116821, "learning_rate": 4.666429634868651e-06, "loss": 0.8723, "step": 3359 }, { "epoch": 0.6889481238466271, "grad_norm": 1.1660741567611694, "learning_rate": 4.66081254000929e-06, "loss": 0.8663, "step": 3360 }, { "epoch": 0.6891531679311051, "grad_norm": 1.2769930362701416, "learning_rate": 4.65519780048456e-06, "loss": 0.899, "step": 3361 }, { "epoch": 0.6893582120155833, "grad_norm": 1.2658497095108032, "learning_rate": 4.649585418771348e-06, "loss": 0.8871, "step": 3362 }, { "epoch": 0.6895632561000615, "grad_norm": 1.2910473346710205, "learning_rate": 4.643975397345515e-06, "loss": 0.8191, "step": 3363 }, { "epoch": 0.6897683001845397, "grad_norm": 1.2554296255111694, "learning_rate": 4.638367738681858e-06, "loss": 0.8284, "step": 3364 }, { "epoch": 0.6899733442690178, "grad_norm": 1.2548694610595703, "learning_rate": 4.6327624452541564e-06, "loss": 0.9101, "step": 3365 }, { "epoch": 0.690178388353496, "grad_norm": 1.2045921087265015, "learning_rate": 4.627159519535131e-06, "loss": 0.8854, "step": 3366 }, { "epoch": 0.6903834324379742, "grad_norm": 1.2467166185379028, "learning_rate": 4.621558963996458e-06, "loss": 0.9171, "step": 3367 }, { "epoch": 0.6905884765224524, "grad_norm": 1.3083637952804565, "learning_rate": 4.615960781108785e-06, "loss": 0.9361, "step": 3368 }, { "epoch": 0.6907935206069304, "grad_norm": 1.1740854978561401, "learning_rate": 4.6103649733416844e-06, "loss": 0.9364, "step": 3369 }, { "epoch": 0.6909985646914086, "grad_norm": 1.2976608276367188, "learning_rate": 4.604771543163706e-06, "loss": 0.8127, "step": 3370 }, { "epoch": 0.6912036087758868, "grad_norm": 1.174237847328186, "learning_rate": 4.599180493042337e-06, "loss": 0.8785, "step": 3371 }, { "epoch": 0.691408652860365, "grad_norm": 1.278210163116455, "learning_rate": 4.593591825444028e-06, "loss": 0.9021, "step": 3372 }, { "epoch": 0.6916136969448431, "grad_norm": 1.2943484783172607, "learning_rate": 4.5880055428341554e-06, "loss": 0.8993, "step": 3373 }, { "epoch": 0.6918187410293213, "grad_norm": 1.285488247871399, "learning_rate": 4.582421647677069e-06, "loss": 0.923, "step": 3374 }, { "epoch": 0.6920237851137995, "grad_norm": 1.1878881454467773, "learning_rate": 4.576840142436045e-06, "loss": 0.9636, "step": 3375 }, { "epoch": 0.6922288291982777, "grad_norm": 1.2973650693893433, "learning_rate": 4.571261029573324e-06, "loss": 0.8656, "step": 3376 }, { "epoch": 0.6924338732827557, "grad_norm": 1.2634342908859253, "learning_rate": 4.565684311550077e-06, "loss": 0.8359, "step": 3377 }, { "epoch": 0.6926389173672339, "grad_norm": 1.1639786958694458, "learning_rate": 4.560109990826423e-06, "loss": 0.8617, "step": 3378 }, { "epoch": 0.6928439614517121, "grad_norm": 1.212141513824463, "learning_rate": 4.554538069861419e-06, "loss": 0.9377, "step": 3379 }, { "epoch": 0.6930490055361903, "grad_norm": 1.2353285551071167, "learning_rate": 4.548968551113076e-06, "loss": 0.873, "step": 3380 }, { "epoch": 0.6932540496206684, "grad_norm": 1.257918357849121, "learning_rate": 4.543401437038335e-06, "loss": 0.8667, "step": 3381 }, { "epoch": 0.6934590937051466, "grad_norm": 1.220855951309204, "learning_rate": 4.537836730093077e-06, "loss": 0.8671, "step": 3382 }, { "epoch": 0.6936641377896248, "grad_norm": 1.3383933305740356, "learning_rate": 4.532274432732122e-06, "loss": 0.9413, "step": 3383 }, { "epoch": 0.693869181874103, "grad_norm": 1.3055278062820435, "learning_rate": 4.526714547409224e-06, "loss": 0.9223, "step": 3384 }, { "epoch": 0.6940742259585811, "grad_norm": 1.3579665422439575, "learning_rate": 4.521157076577085e-06, "loss": 0.8668, "step": 3385 }, { "epoch": 0.6942792700430592, "grad_norm": 1.3042564392089844, "learning_rate": 4.5156020226873295e-06, "loss": 0.8109, "step": 3386 }, { "epoch": 0.6944843141275374, "grad_norm": 1.2912441492080688, "learning_rate": 4.510049388190518e-06, "loss": 0.9352, "step": 3387 }, { "epoch": 0.6946893582120156, "grad_norm": 1.2498762607574463, "learning_rate": 4.504499175536145e-06, "loss": 0.9616, "step": 3388 }, { "epoch": 0.6948944022964938, "grad_norm": 1.232717752456665, "learning_rate": 4.49895138717264e-06, "loss": 0.9374, "step": 3389 }, { "epoch": 0.6950994463809719, "grad_norm": 1.2275311946868896, "learning_rate": 4.49340602554736e-06, "loss": 0.8851, "step": 3390 }, { "epoch": 0.6953044904654501, "grad_norm": 1.2246965169906616, "learning_rate": 4.4878630931065895e-06, "loss": 0.9409, "step": 3391 }, { "epoch": 0.6955095345499283, "grad_norm": 1.2348085641860962, "learning_rate": 4.482322592295541e-06, "loss": 0.8751, "step": 3392 }, { "epoch": 0.6957145786344064, "grad_norm": 1.2663235664367676, "learning_rate": 4.476784525558363e-06, "loss": 0.9065, "step": 3393 }, { "epoch": 0.6959196227188845, "grad_norm": 1.2504816055297852, "learning_rate": 4.471248895338121e-06, "loss": 0.8956, "step": 3394 }, { "epoch": 0.6961246668033627, "grad_norm": 1.2609198093414307, "learning_rate": 4.465715704076807e-06, "loss": 0.7892, "step": 3395 }, { "epoch": 0.6963297108878409, "grad_norm": 1.2139837741851807, "learning_rate": 4.460184954215339e-06, "loss": 0.9265, "step": 3396 }, { "epoch": 0.6965347549723191, "grad_norm": 1.1214593648910522, "learning_rate": 4.454656648193559e-06, "loss": 0.8843, "step": 3397 }, { "epoch": 0.6967397990567972, "grad_norm": 1.2105846405029297, "learning_rate": 4.4491307884502325e-06, "loss": 0.8901, "step": 3398 }, { "epoch": 0.6969448431412754, "grad_norm": 1.2717148065567017, "learning_rate": 4.443607377423037e-06, "loss": 0.8785, "step": 3399 }, { "epoch": 0.6971498872257536, "grad_norm": 1.1825331449508667, "learning_rate": 4.438086417548579e-06, "loss": 0.8875, "step": 3400 }, { "epoch": 0.6973549313102317, "grad_norm": 1.2920695543289185, "learning_rate": 4.432567911262377e-06, "loss": 0.9492, "step": 3401 }, { "epoch": 0.6975599753947098, "grad_norm": 1.154603123664856, "learning_rate": 4.427051860998877e-06, "loss": 0.8872, "step": 3402 }, { "epoch": 0.697765019479188, "grad_norm": 1.2408299446105957, "learning_rate": 4.421538269191427e-06, "loss": 0.8883, "step": 3403 }, { "epoch": 0.6979700635636662, "grad_norm": 1.1150424480438232, "learning_rate": 4.41602713827231e-06, "loss": 0.914, "step": 3404 }, { "epoch": 0.6981751076481444, "grad_norm": 1.173747181892395, "learning_rate": 4.4105184706726965e-06, "loss": 0.8082, "step": 3405 }, { "epoch": 0.6983801517326225, "grad_norm": 1.2631314992904663, "learning_rate": 4.4050122688227e-06, "loss": 0.9372, "step": 3406 }, { "epoch": 0.6985851958171007, "grad_norm": 1.234009861946106, "learning_rate": 4.399508535151321e-06, "loss": 0.9144, "step": 3407 }, { "epoch": 0.6987902399015788, "grad_norm": 1.2340959310531616, "learning_rate": 4.394007272086495e-06, "loss": 0.8036, "step": 3408 }, { "epoch": 0.698995283986057, "grad_norm": 1.194852352142334, "learning_rate": 4.38850848205504e-06, "loss": 0.9102, "step": 3409 }, { "epoch": 0.6992003280705351, "grad_norm": 1.2281922101974487, "learning_rate": 4.383012167482707e-06, "loss": 0.8656, "step": 3410 }, { "epoch": 0.6994053721550133, "grad_norm": 1.2564644813537598, "learning_rate": 4.377518330794143e-06, "loss": 0.8723, "step": 3411 }, { "epoch": 0.6996104162394915, "grad_norm": 1.239774227142334, "learning_rate": 4.372026974412907e-06, "loss": 0.942, "step": 3412 }, { "epoch": 0.6998154603239697, "grad_norm": 1.243910789489746, "learning_rate": 4.366538100761462e-06, "loss": 0.9108, "step": 3413 }, { "epoch": 0.7000205044084478, "grad_norm": 1.3127655982971191, "learning_rate": 4.361051712261173e-06, "loss": 0.8806, "step": 3414 }, { "epoch": 0.700225548492926, "grad_norm": 1.3899098634719849, "learning_rate": 4.355567811332311e-06, "loss": 0.8699, "step": 3415 }, { "epoch": 0.7004305925774041, "grad_norm": 1.3664450645446777, "learning_rate": 4.350086400394047e-06, "loss": 0.8784, "step": 3416 }, { "epoch": 0.7006356366618823, "grad_norm": 1.2590869665145874, "learning_rate": 4.344607481864466e-06, "loss": 0.9184, "step": 3417 }, { "epoch": 0.7008406807463605, "grad_norm": 1.2162290811538696, "learning_rate": 4.339131058160531e-06, "loss": 0.8873, "step": 3418 }, { "epoch": 0.7010457248308386, "grad_norm": 1.269956111907959, "learning_rate": 4.333657131698127e-06, "loss": 0.986, "step": 3419 }, { "epoch": 0.7012507689153168, "grad_norm": 1.2288475036621094, "learning_rate": 4.32818570489202e-06, "loss": 0.9298, "step": 3420 }, { "epoch": 0.701455812999795, "grad_norm": 1.320900321006775, "learning_rate": 4.3227167801558875e-06, "loss": 0.9073, "step": 3421 }, { "epoch": 0.7016608570842732, "grad_norm": 1.2205588817596436, "learning_rate": 4.317250359902295e-06, "loss": 0.8734, "step": 3422 }, { "epoch": 0.7018659011687512, "grad_norm": 1.2086901664733887, "learning_rate": 4.311786446542703e-06, "loss": 0.9085, "step": 3423 }, { "epoch": 0.7020709452532294, "grad_norm": 1.3793911933898926, "learning_rate": 4.306325042487465e-06, "loss": 0.9649, "step": 3424 }, { "epoch": 0.7022759893377076, "grad_norm": 1.3397585153579712, "learning_rate": 4.300866150145837e-06, "loss": 0.9099, "step": 3425 }, { "epoch": 0.7024810334221858, "grad_norm": 1.4566906690597534, "learning_rate": 4.295409771925958e-06, "loss": 0.9805, "step": 3426 }, { "epoch": 0.7026860775066639, "grad_norm": 1.2581124305725098, "learning_rate": 4.2899559102348585e-06, "loss": 0.9237, "step": 3427 }, { "epoch": 0.7028911215911421, "grad_norm": 1.1612203121185303, "learning_rate": 4.284504567478459e-06, "loss": 0.9441, "step": 3428 }, { "epoch": 0.7030961656756203, "grad_norm": 1.1667400598526, "learning_rate": 4.2790557460615755e-06, "loss": 0.9154, "step": 3429 }, { "epoch": 0.7033012097600985, "grad_norm": 1.2664512395858765, "learning_rate": 4.273609448387905e-06, "loss": 0.8968, "step": 3430 }, { "epoch": 0.7035062538445765, "grad_norm": 1.232726812362671, "learning_rate": 4.268165676860032e-06, "loss": 0.9805, "step": 3431 }, { "epoch": 0.7037112979290547, "grad_norm": 1.263300895690918, "learning_rate": 4.262724433879427e-06, "loss": 0.8777, "step": 3432 }, { "epoch": 0.7039163420135329, "grad_norm": 1.1488949060440063, "learning_rate": 4.257285721846443e-06, "loss": 0.8897, "step": 3433 }, { "epoch": 0.7041213860980111, "grad_norm": 1.2010841369628906, "learning_rate": 4.251849543160324e-06, "loss": 0.8487, "step": 3434 }, { "epoch": 0.7043264301824892, "grad_norm": 1.2664588689804077, "learning_rate": 4.246415900219192e-06, "loss": 0.9216, "step": 3435 }, { "epoch": 0.7045314742669674, "grad_norm": 1.4295991659164429, "learning_rate": 4.240984795420044e-06, "loss": 0.9802, "step": 3436 }, { "epoch": 0.7047365183514456, "grad_norm": 1.2671430110931396, "learning_rate": 4.235556231158765e-06, "loss": 0.9026, "step": 3437 }, { "epoch": 0.7049415624359238, "grad_norm": 1.1657681465148926, "learning_rate": 4.230130209830122e-06, "loss": 0.858, "step": 3438 }, { "epoch": 0.7051466065204018, "grad_norm": 1.2645511627197266, "learning_rate": 4.2247067338277495e-06, "loss": 0.9115, "step": 3439 }, { "epoch": 0.70535165060488, "grad_norm": 1.1903870105743408, "learning_rate": 4.219285805544173e-06, "loss": 0.8879, "step": 3440 }, { "epoch": 0.7055566946893582, "grad_norm": 1.2107454538345337, "learning_rate": 4.213867427370776e-06, "loss": 0.8742, "step": 3441 }, { "epoch": 0.7057617387738364, "grad_norm": 1.1954418420791626, "learning_rate": 4.208451601697836e-06, "loss": 0.9411, "step": 3442 }, { "epoch": 0.7059667828583145, "grad_norm": 1.1857166290283203, "learning_rate": 4.203038330914489e-06, "loss": 0.8965, "step": 3443 }, { "epoch": 0.7061718269427927, "grad_norm": 1.2216408252716064, "learning_rate": 4.1976276174087594e-06, "loss": 0.9191, "step": 3444 }, { "epoch": 0.7063768710272709, "grad_norm": 1.3475414514541626, "learning_rate": 4.192219463567529e-06, "loss": 0.9237, "step": 3445 }, { "epoch": 0.7065819151117491, "grad_norm": 1.1962547302246094, "learning_rate": 4.1868138717765584e-06, "loss": 0.8106, "step": 3446 }, { "epoch": 0.7067869591962271, "grad_norm": 1.3247138261795044, "learning_rate": 4.181410844420473e-06, "loss": 0.9443, "step": 3447 }, { "epoch": 0.7069920032807053, "grad_norm": 1.258646845817566, "learning_rate": 4.1760103838827705e-06, "loss": 0.9325, "step": 3448 }, { "epoch": 0.7071970473651835, "grad_norm": 1.1915485858917236, "learning_rate": 4.1706124925458225e-06, "loss": 0.8479, "step": 3449 }, { "epoch": 0.7074020914496617, "grad_norm": 1.2549489736557007, "learning_rate": 4.165217172790847e-06, "loss": 0.8445, "step": 3450 }, { "epoch": 0.7076071355341398, "grad_norm": 1.3474156856536865, "learning_rate": 4.159824426997954e-06, "loss": 0.8751, "step": 3451 }, { "epoch": 0.707812179618618, "grad_norm": 1.3088034391403198, "learning_rate": 4.154434257546095e-06, "loss": 0.9424, "step": 3452 }, { "epoch": 0.7080172237030962, "grad_norm": 1.1982250213623047, "learning_rate": 4.149046666813103e-06, "loss": 0.8882, "step": 3453 }, { "epoch": 0.7082222677875744, "grad_norm": 1.2810993194580078, "learning_rate": 4.14366165717566e-06, "loss": 0.9274, "step": 3454 }, { "epoch": 0.7084273118720525, "grad_norm": 1.2483958005905151, "learning_rate": 4.138279231009319e-06, "loss": 0.9011, "step": 3455 }, { "epoch": 0.7086323559565306, "grad_norm": 1.2845927476882935, "learning_rate": 4.1328993906884815e-06, "loss": 0.9168, "step": 3456 }, { "epoch": 0.7088374000410088, "grad_norm": 1.292675256729126, "learning_rate": 4.127522138586424e-06, "loss": 0.9266, "step": 3457 }, { "epoch": 0.709042444125487, "grad_norm": 1.191215991973877, "learning_rate": 4.12214747707527e-06, "loss": 0.8676, "step": 3458 }, { "epoch": 0.7092474882099652, "grad_norm": 1.2627748250961304, "learning_rate": 4.116775408526002e-06, "loss": 0.9126, "step": 3459 }, { "epoch": 0.7094525322944433, "grad_norm": 1.2293368577957153, "learning_rate": 4.111405935308458e-06, "loss": 0.9058, "step": 3460 }, { "epoch": 0.7096575763789215, "grad_norm": 1.3042351007461548, "learning_rate": 4.106039059791338e-06, "loss": 0.954, "step": 3461 }, { "epoch": 0.7098626204633997, "grad_norm": 1.2023258209228516, "learning_rate": 4.10067478434219e-06, "loss": 0.9315, "step": 3462 }, { "epoch": 0.7100676645478778, "grad_norm": 1.2992045879364014, "learning_rate": 4.095313111327413e-06, "loss": 0.8919, "step": 3463 }, { "epoch": 0.7102727086323559, "grad_norm": 1.3954437971115112, "learning_rate": 4.089954043112263e-06, "loss": 0.931, "step": 3464 }, { "epoch": 0.7104777527168341, "grad_norm": 1.2278698682785034, "learning_rate": 4.08459758206084e-06, "loss": 0.9077, "step": 3465 }, { "epoch": 0.7106827968013123, "grad_norm": 1.2912663221359253, "learning_rate": 4.079243730536107e-06, "loss": 0.8704, "step": 3466 }, { "epoch": 0.7108878408857905, "grad_norm": 1.3771793842315674, "learning_rate": 4.073892490899865e-06, "loss": 0.946, "step": 3467 }, { "epoch": 0.7110928849702686, "grad_norm": 1.2186074256896973, "learning_rate": 4.068543865512762e-06, "loss": 0.8388, "step": 3468 }, { "epoch": 0.7112979290547468, "grad_norm": 1.296272873878479, "learning_rate": 4.063197856734295e-06, "loss": 0.826, "step": 3469 }, { "epoch": 0.711502973139225, "grad_norm": 1.152826189994812, "learning_rate": 4.057854466922814e-06, "loss": 0.8624, "step": 3470 }, { "epoch": 0.7117080172237031, "grad_norm": 1.2888320684432983, "learning_rate": 4.0525136984355e-06, "loss": 0.9578, "step": 3471 }, { "epoch": 0.7119130613081812, "grad_norm": 1.2630836963653564, "learning_rate": 4.047175553628397e-06, "loss": 0.8942, "step": 3472 }, { "epoch": 0.7121181053926594, "grad_norm": 1.1183000802993774, "learning_rate": 4.041840034856366e-06, "loss": 0.8768, "step": 3473 }, { "epoch": 0.7123231494771376, "grad_norm": 1.2604506015777588, "learning_rate": 4.036507144473132e-06, "loss": 0.8864, "step": 3474 }, { "epoch": 0.7125281935616158, "grad_norm": 1.3458237648010254, "learning_rate": 4.0311768848312485e-06, "loss": 0.9221, "step": 3475 }, { "epoch": 0.7127332376460939, "grad_norm": 1.2127734422683716, "learning_rate": 4.025849258282112e-06, "loss": 0.8634, "step": 3476 }, { "epoch": 0.712938281730572, "grad_norm": 1.251640796661377, "learning_rate": 4.020524267175954e-06, "loss": 0.8781, "step": 3477 }, { "epoch": 0.7131433258150502, "grad_norm": 1.2262251377105713, "learning_rate": 4.015201913861852e-06, "loss": 0.9349, "step": 3478 }, { "epoch": 0.7133483698995284, "grad_norm": 1.3210570812225342, "learning_rate": 4.009882200687714e-06, "loss": 0.9025, "step": 3479 }, { "epoch": 0.7135534139840065, "grad_norm": 1.2261499166488647, "learning_rate": 4.004565130000277e-06, "loss": 0.8647, "step": 3480 }, { "epoch": 0.7137584580684847, "grad_norm": 1.2194916009902954, "learning_rate": 3.999250704145131e-06, "loss": 0.7486, "step": 3481 }, { "epoch": 0.7139635021529629, "grad_norm": 1.3064197301864624, "learning_rate": 3.993938925466674e-06, "loss": 0.9293, "step": 3482 }, { "epoch": 0.7141685462374411, "grad_norm": 1.2018095254898071, "learning_rate": 3.9886297963081585e-06, "loss": 0.8649, "step": 3483 }, { "epoch": 0.7143735903219192, "grad_norm": 1.209181308746338, "learning_rate": 3.9833233190116535e-06, "loss": 0.864, "step": 3484 }, { "epoch": 0.7145786344063974, "grad_norm": 1.2435652017593384, "learning_rate": 3.978019495918073e-06, "loss": 0.9134, "step": 3485 }, { "epoch": 0.7147836784908755, "grad_norm": 1.2183603048324585, "learning_rate": 3.972718329367138e-06, "loss": 0.9121, "step": 3486 }, { "epoch": 0.7149887225753537, "grad_norm": 1.4162654876708984, "learning_rate": 3.96741982169742e-06, "loss": 0.951, "step": 3487 }, { "epoch": 0.7151937666598318, "grad_norm": 1.3128896951675415, "learning_rate": 3.962123975246303e-06, "loss": 0.8908, "step": 3488 }, { "epoch": 0.71539881074431, "grad_norm": 1.3009718656539917, "learning_rate": 3.956830792350007e-06, "loss": 0.8859, "step": 3489 }, { "epoch": 0.7156038548287882, "grad_norm": 1.1736104488372803, "learning_rate": 3.951540275343569e-06, "loss": 0.8435, "step": 3490 }, { "epoch": 0.7158088989132664, "grad_norm": 1.1685543060302734, "learning_rate": 3.946252426560855e-06, "loss": 0.8093, "step": 3491 }, { "epoch": 0.7160139429977446, "grad_norm": 1.2174651622772217, "learning_rate": 3.9409672483345465e-06, "loss": 0.8943, "step": 3492 }, { "epoch": 0.7162189870822226, "grad_norm": 1.2901520729064941, "learning_rate": 3.93568474299616e-06, "loss": 0.9184, "step": 3493 }, { "epoch": 0.7164240311667008, "grad_norm": 1.1842784881591797, "learning_rate": 3.9304049128760215e-06, "loss": 0.861, "step": 3494 }, { "epoch": 0.716629075251179, "grad_norm": 1.1940722465515137, "learning_rate": 3.925127760303281e-06, "loss": 0.8641, "step": 3495 }, { "epoch": 0.7168341193356572, "grad_norm": 1.293587565422058, "learning_rate": 3.919853287605908e-06, "loss": 0.8567, "step": 3496 }, { "epoch": 0.7170391634201353, "grad_norm": 1.2302510738372803, "learning_rate": 3.914581497110684e-06, "loss": 0.9545, "step": 3497 }, { "epoch": 0.7172442075046135, "grad_norm": 1.243178367614746, "learning_rate": 3.9093123911432194e-06, "loss": 0.8624, "step": 3498 }, { "epoch": 0.7174492515890917, "grad_norm": 1.2970467805862427, "learning_rate": 3.90404597202793e-06, "loss": 0.8697, "step": 3499 }, { "epoch": 0.7176542956735699, "grad_norm": 1.3403311967849731, "learning_rate": 3.898782242088049e-06, "loss": 0.9118, "step": 3500 }, { "epoch": 0.7178593397580479, "grad_norm": 1.234751582145691, "learning_rate": 3.893521203645618e-06, "loss": 0.7507, "step": 3501 }, { "epoch": 0.7180643838425261, "grad_norm": 1.3117358684539795, "learning_rate": 3.888262859021508e-06, "loss": 0.873, "step": 3502 }, { "epoch": 0.7182694279270043, "grad_norm": 1.3306523561477661, "learning_rate": 3.883007210535384e-06, "loss": 0.9133, "step": 3503 }, { "epoch": 0.7184744720114825, "grad_norm": 1.301024079322815, "learning_rate": 3.8777542605057295e-06, "loss": 0.8944, "step": 3504 }, { "epoch": 0.7186795160959606, "grad_norm": 1.1897265911102295, "learning_rate": 3.8725040112498315e-06, "loss": 0.9194, "step": 3505 }, { "epoch": 0.7188845601804388, "grad_norm": 1.35617995262146, "learning_rate": 3.867256465083799e-06, "loss": 0.7869, "step": 3506 }, { "epoch": 0.719089604264917, "grad_norm": 1.285191297531128, "learning_rate": 3.862011624322534e-06, "loss": 0.9169, "step": 3507 }, { "epoch": 0.7192946483493952, "grad_norm": 1.3121566772460938, "learning_rate": 3.856769491279755e-06, "loss": 0.887, "step": 3508 }, { "epoch": 0.7194996924338732, "grad_norm": 1.1995916366577148, "learning_rate": 3.851530068267975e-06, "loss": 0.8276, "step": 3509 }, { "epoch": 0.7197047365183514, "grad_norm": 1.2247748374938965, "learning_rate": 3.8462933575985265e-06, "loss": 0.8775, "step": 3510 }, { "epoch": 0.7199097806028296, "grad_norm": 1.265958547592163, "learning_rate": 3.841059361581533e-06, "loss": 0.8581, "step": 3511 }, { "epoch": 0.7201148246873078, "grad_norm": 1.2584607601165771, "learning_rate": 3.835828082525925e-06, "loss": 0.9679, "step": 3512 }, { "epoch": 0.7203198687717859, "grad_norm": 1.4689687490463257, "learning_rate": 3.830599522739437e-06, "loss": 0.8552, "step": 3513 }, { "epoch": 0.7205249128562641, "grad_norm": 1.2833386659622192, "learning_rate": 3.825373684528595e-06, "loss": 0.939, "step": 3514 }, { "epoch": 0.7207299569407423, "grad_norm": 1.3039617538452148, "learning_rate": 3.820150570198737e-06, "loss": 0.8653, "step": 3515 }, { "epoch": 0.7209350010252205, "grad_norm": 1.2383853197097778, "learning_rate": 3.814930182053988e-06, "loss": 0.904, "step": 3516 }, { "epoch": 0.7211400451096985, "grad_norm": 1.2455593347549438, "learning_rate": 3.8097125223972864e-06, "loss": 0.9066, "step": 3517 }, { "epoch": 0.7213450891941767, "grad_norm": 1.1671549081802368, "learning_rate": 3.80449759353034e-06, "loss": 0.8895, "step": 3518 }, { "epoch": 0.7215501332786549, "grad_norm": 1.2506870031356812, "learning_rate": 3.7992853977536794e-06, "loss": 0.8663, "step": 3519 }, { "epoch": 0.7217551773631331, "grad_norm": 1.2437607049942017, "learning_rate": 3.7940759373666103e-06, "loss": 0.8694, "step": 3520 }, { "epoch": 0.7219602214476112, "grad_norm": 1.1967153549194336, "learning_rate": 3.78886921466725e-06, "loss": 0.8723, "step": 3521 }, { "epoch": 0.7221652655320894, "grad_norm": 1.3070200681686401, "learning_rate": 3.7836652319524835e-06, "loss": 0.7791, "step": 3522 }, { "epoch": 0.7223703096165676, "grad_norm": 1.3202649354934692, "learning_rate": 3.778463991518012e-06, "loss": 0.9048, "step": 3523 }, { "epoch": 0.7225753537010458, "grad_norm": 1.2792402505874634, "learning_rate": 3.773265495658309e-06, "loss": 0.8794, "step": 3524 }, { "epoch": 0.7227803977855239, "grad_norm": 1.3240317106246948, "learning_rate": 3.7680697466666515e-06, "loss": 0.9431, "step": 3525 }, { "epoch": 0.722985441870002, "grad_norm": 1.172577142715454, "learning_rate": 3.7628767468350946e-06, "loss": 0.8511, "step": 3526 }, { "epoch": 0.7231904859544802, "grad_norm": 1.1758238077163696, "learning_rate": 3.7576864984544814e-06, "loss": 0.8651, "step": 3527 }, { "epoch": 0.7233955300389584, "grad_norm": 1.3402193784713745, "learning_rate": 3.752499003814445e-06, "loss": 0.9166, "step": 3528 }, { "epoch": 0.7236005741234366, "grad_norm": 1.2394540309906006, "learning_rate": 3.7473142652033977e-06, "loss": 0.8659, "step": 3529 }, { "epoch": 0.7238056182079147, "grad_norm": 1.2901753187179565, "learning_rate": 3.7421322849085516e-06, "loss": 0.9025, "step": 3530 }, { "epoch": 0.7240106622923929, "grad_norm": 1.2148327827453613, "learning_rate": 3.736953065215877e-06, "loss": 0.907, "step": 3531 }, { "epoch": 0.724215706376871, "grad_norm": 1.2954331636428833, "learning_rate": 3.73177660841015e-06, "loss": 0.9354, "step": 3532 }, { "epoch": 0.7244207504613492, "grad_norm": 1.232756495475769, "learning_rate": 3.726602916774912e-06, "loss": 0.8543, "step": 3533 }, { "epoch": 0.7246257945458273, "grad_norm": 1.1358243227005005, "learning_rate": 3.721431992592497e-06, "loss": 0.8564, "step": 3534 }, { "epoch": 0.7248308386303055, "grad_norm": 1.3309956789016724, "learning_rate": 3.7162638381440077e-06, "loss": 0.8519, "step": 3535 }, { "epoch": 0.7250358827147837, "grad_norm": 1.2038480043411255, "learning_rate": 3.71109845570933e-06, "loss": 0.8177, "step": 3536 }, { "epoch": 0.7252409267992619, "grad_norm": 1.192345142364502, "learning_rate": 3.7059358475671225e-06, "loss": 0.8946, "step": 3537 }, { "epoch": 0.72544597088374, "grad_norm": 1.2293821573257446, "learning_rate": 3.70077601599483e-06, "loss": 0.8973, "step": 3538 }, { "epoch": 0.7256510149682182, "grad_norm": 1.254163146018982, "learning_rate": 3.6956189632686624e-06, "loss": 0.8746, "step": 3539 }, { "epoch": 0.7258560590526963, "grad_norm": 1.231833577156067, "learning_rate": 3.690464691663608e-06, "loss": 0.8447, "step": 3540 }, { "epoch": 0.7260611031371745, "grad_norm": 1.4537068605422974, "learning_rate": 3.685313203453422e-06, "loss": 0.8796, "step": 3541 }, { "epoch": 0.7262661472216526, "grad_norm": 1.2777024507522583, "learning_rate": 3.680164500910646e-06, "loss": 0.8741, "step": 3542 }, { "epoch": 0.7264711913061308, "grad_norm": 1.4065237045288086, "learning_rate": 3.67501858630658e-06, "loss": 0.9196, "step": 3543 }, { "epoch": 0.726676235390609, "grad_norm": 1.2613409757614136, "learning_rate": 3.6698754619112974e-06, "loss": 0.8168, "step": 3544 }, { "epoch": 0.7268812794750872, "grad_norm": 1.2414613962173462, "learning_rate": 3.664735129993643e-06, "loss": 0.8651, "step": 3545 }, { "epoch": 0.7270863235595653, "grad_norm": 1.27672278881073, "learning_rate": 3.6595975928212237e-06, "loss": 0.8624, "step": 3546 }, { "epoch": 0.7272913676440435, "grad_norm": 1.2786495685577393, "learning_rate": 3.654462852660423e-06, "loss": 0.8959, "step": 3547 }, { "epoch": 0.7274964117285216, "grad_norm": 1.3295905590057373, "learning_rate": 3.6493309117763853e-06, "loss": 0.8173, "step": 3548 }, { "epoch": 0.7277014558129998, "grad_norm": 1.2798160314559937, "learning_rate": 3.644201772433018e-06, "loss": 0.9031, "step": 3549 }, { "epoch": 0.7279064998974779, "grad_norm": 1.2076165676116943, "learning_rate": 3.6390754368929924e-06, "loss": 0.8967, "step": 3550 }, { "epoch": 0.7281115439819561, "grad_norm": 1.1776201725006104, "learning_rate": 3.633951907417751e-06, "loss": 0.8671, "step": 3551 }, { "epoch": 0.7283165880664343, "grad_norm": 1.274376392364502, "learning_rate": 3.6288311862674885e-06, "loss": 0.9034, "step": 3552 }, { "epoch": 0.7285216321509125, "grad_norm": 1.2323769330978394, "learning_rate": 3.6237132757011727e-06, "loss": 0.8927, "step": 3553 }, { "epoch": 0.7287266762353906, "grad_norm": 1.2750928401947021, "learning_rate": 3.6185981779765113e-06, "loss": 0.907, "step": 3554 }, { "epoch": 0.7289317203198687, "grad_norm": 1.2161856889724731, "learning_rate": 3.613485895349994e-06, "loss": 0.773, "step": 3555 }, { "epoch": 0.7291367644043469, "grad_norm": 1.465704321861267, "learning_rate": 3.6083764300768554e-06, "loss": 0.8908, "step": 3556 }, { "epoch": 0.7293418084888251, "grad_norm": 1.2813178300857544, "learning_rate": 3.6032697844110896e-06, "loss": 0.8725, "step": 3557 }, { "epoch": 0.7295468525733032, "grad_norm": 1.4017391204833984, "learning_rate": 3.5981659606054455e-06, "loss": 0.8777, "step": 3558 }, { "epoch": 0.7297518966577814, "grad_norm": 1.2948545217514038, "learning_rate": 3.593064960911433e-06, "loss": 0.9084, "step": 3559 }, { "epoch": 0.7299569407422596, "grad_norm": 1.2034398317337036, "learning_rate": 3.587966787579312e-06, "loss": 0.908, "step": 3560 }, { "epoch": 0.7301619848267378, "grad_norm": 1.252606749534607, "learning_rate": 3.58287144285809e-06, "loss": 0.8882, "step": 3561 }, { "epoch": 0.730367028911216, "grad_norm": 1.267235517501831, "learning_rate": 3.5777789289955454e-06, "loss": 0.9117, "step": 3562 }, { "epoch": 0.730572072995694, "grad_norm": 1.2712137699127197, "learning_rate": 3.5726892482381802e-06, "loss": 0.7966, "step": 3563 }, { "epoch": 0.7307771170801722, "grad_norm": 1.1502827405929565, "learning_rate": 3.5676024028312705e-06, "loss": 0.9057, "step": 3564 }, { "epoch": 0.7309821611646504, "grad_norm": 1.2238494157791138, "learning_rate": 3.5625183950188267e-06, "loss": 0.8972, "step": 3565 }, { "epoch": 0.7311872052491286, "grad_norm": 1.1801371574401855, "learning_rate": 3.557437227043624e-06, "loss": 0.9115, "step": 3566 }, { "epoch": 0.7313922493336067, "grad_norm": 1.3191922903060913, "learning_rate": 3.5523589011471592e-06, "loss": 0.9123, "step": 3567 }, { "epoch": 0.7315972934180849, "grad_norm": 1.2134150266647339, "learning_rate": 3.5472834195697017e-06, "loss": 0.9163, "step": 3568 }, { "epoch": 0.7318023375025631, "grad_norm": 1.2554229497909546, "learning_rate": 3.542210784550246e-06, "loss": 0.881, "step": 3569 }, { "epoch": 0.7320073815870413, "grad_norm": 1.2520045042037964, "learning_rate": 3.537140998326548e-06, "loss": 0.9374, "step": 3570 }, { "epoch": 0.7322124256715193, "grad_norm": 1.2639458179473877, "learning_rate": 3.5320740631350946e-06, "loss": 0.9193, "step": 3571 }, { "epoch": 0.7324174697559975, "grad_norm": 1.1255440711975098, "learning_rate": 3.527009981211119e-06, "loss": 0.8691, "step": 3572 }, { "epoch": 0.7326225138404757, "grad_norm": 1.2593904733657837, "learning_rate": 3.521948754788591e-06, "loss": 0.8391, "step": 3573 }, { "epoch": 0.7328275579249539, "grad_norm": 1.165908694267273, "learning_rate": 3.5168903861002334e-06, "loss": 0.8702, "step": 3574 }, { "epoch": 0.733032602009432, "grad_norm": 1.2110530138015747, "learning_rate": 3.5118348773774957e-06, "loss": 0.8961, "step": 3575 }, { "epoch": 0.7332376460939102, "grad_norm": 1.2975271940231323, "learning_rate": 3.5067822308505707e-06, "loss": 0.8797, "step": 3576 }, { "epoch": 0.7334426901783884, "grad_norm": 1.2305349111557007, "learning_rate": 3.5017324487483873e-06, "loss": 0.911, "step": 3577 }, { "epoch": 0.7336477342628666, "grad_norm": 1.2829838991165161, "learning_rate": 3.4966855332986094e-06, "loss": 0.8955, "step": 3578 }, { "epoch": 0.7338527783473446, "grad_norm": 1.2565089464187622, "learning_rate": 3.491641486727645e-06, "loss": 0.8456, "step": 3579 }, { "epoch": 0.7340578224318228, "grad_norm": 1.1677346229553223, "learning_rate": 3.486600311260625e-06, "loss": 0.8909, "step": 3580 }, { "epoch": 0.734262866516301, "grad_norm": 1.2489033937454224, "learning_rate": 3.481562009121421e-06, "loss": 0.8506, "step": 3581 }, { "epoch": 0.7344679106007792, "grad_norm": 1.2789684534072876, "learning_rate": 3.47652658253263e-06, "loss": 0.863, "step": 3582 }, { "epoch": 0.7346729546852573, "grad_norm": 1.245428204536438, "learning_rate": 3.471494033715592e-06, "loss": 0.9121, "step": 3583 }, { "epoch": 0.7348779987697355, "grad_norm": 1.286333441734314, "learning_rate": 3.4664643648903685e-06, "loss": 0.8882, "step": 3584 }, { "epoch": 0.7350830428542137, "grad_norm": 1.1932213306427002, "learning_rate": 3.4614375782757513e-06, "loss": 0.821, "step": 3585 }, { "epoch": 0.7352880869386919, "grad_norm": 1.2306551933288574, "learning_rate": 3.4564136760892596e-06, "loss": 0.9338, "step": 3586 }, { "epoch": 0.7354931310231699, "grad_norm": 1.3234978914260864, "learning_rate": 3.4513926605471504e-06, "loss": 0.921, "step": 3587 }, { "epoch": 0.7356981751076481, "grad_norm": 1.2016942501068115, "learning_rate": 3.4463745338643937e-06, "loss": 0.8431, "step": 3588 }, { "epoch": 0.7359032191921263, "grad_norm": 1.143277645111084, "learning_rate": 3.441359298254694e-06, "loss": 0.8377, "step": 3589 }, { "epoch": 0.7361082632766045, "grad_norm": 1.3531627655029297, "learning_rate": 3.436346955930472e-06, "loss": 0.873, "step": 3590 }, { "epoch": 0.7363133073610826, "grad_norm": 1.2995506525039673, "learning_rate": 3.4313375091028845e-06, "loss": 0.8707, "step": 3591 }, { "epoch": 0.7365183514455608, "grad_norm": 1.222800850868225, "learning_rate": 3.4263309599818017e-06, "loss": 0.8251, "step": 3592 }, { "epoch": 0.736723395530039, "grad_norm": 1.3812980651855469, "learning_rate": 3.4213273107758173e-06, "loss": 0.8467, "step": 3593 }, { "epoch": 0.7369284396145172, "grad_norm": 1.2539414167404175, "learning_rate": 3.416326563692246e-06, "loss": 0.848, "step": 3594 }, { "epoch": 0.7371334836989952, "grad_norm": 1.3148832321166992, "learning_rate": 3.4113287209371194e-06, "loss": 0.8593, "step": 3595 }, { "epoch": 0.7373385277834734, "grad_norm": 1.2278873920440674, "learning_rate": 3.406333784715198e-06, "loss": 0.8508, "step": 3596 }, { "epoch": 0.7375435718679516, "grad_norm": 1.302789330482483, "learning_rate": 3.4013417572299446e-06, "loss": 0.8701, "step": 3597 }, { "epoch": 0.7377486159524298, "grad_norm": 1.2178963422775269, "learning_rate": 3.39635264068356e-06, "loss": 0.9649, "step": 3598 }, { "epoch": 0.737953660036908, "grad_norm": 1.2727398872375488, "learning_rate": 3.3913664372769327e-06, "loss": 0.8418, "step": 3599 }, { "epoch": 0.7381587041213861, "grad_norm": 1.3452929258346558, "learning_rate": 3.3863831492096932e-06, "loss": 0.8401, "step": 3600 }, { "epoch": 0.7383637482058643, "grad_norm": 1.2608568668365479, "learning_rate": 3.3814027786801675e-06, "loss": 0.8749, "step": 3601 }, { "epoch": 0.7385687922903424, "grad_norm": 1.3110871315002441, "learning_rate": 3.37642532788541e-06, "loss": 0.9052, "step": 3602 }, { "epoch": 0.7387738363748206, "grad_norm": 1.2287474870681763, "learning_rate": 3.3714507990211654e-06, "loss": 0.9348, "step": 3603 }, { "epoch": 0.7389788804592987, "grad_norm": 1.2787009477615356, "learning_rate": 3.366479194281913e-06, "loss": 0.8916, "step": 3604 }, { "epoch": 0.7391839245437769, "grad_norm": 1.1640383005142212, "learning_rate": 3.3615105158608286e-06, "loss": 0.8526, "step": 3605 }, { "epoch": 0.7393889686282551, "grad_norm": 1.2904664278030396, "learning_rate": 3.3565447659497963e-06, "loss": 0.9079, "step": 3606 }, { "epoch": 0.7395940127127333, "grad_norm": 1.3404102325439453, "learning_rate": 3.3515819467394184e-06, "loss": 0.9487, "step": 3607 }, { "epoch": 0.7397990567972114, "grad_norm": 1.3991615772247314, "learning_rate": 3.3466220604189957e-06, "loss": 0.9017, "step": 3608 }, { "epoch": 0.7400041008816896, "grad_norm": 1.2139461040496826, "learning_rate": 3.3416651091765374e-06, "loss": 0.9005, "step": 3609 }, { "epoch": 0.7402091449661677, "grad_norm": 1.2986876964569092, "learning_rate": 3.3367110951987536e-06, "loss": 0.9265, "step": 3610 }, { "epoch": 0.7404141890506459, "grad_norm": 1.1934845447540283, "learning_rate": 3.331760020671071e-06, "loss": 0.8474, "step": 3611 }, { "epoch": 0.740619233135124, "grad_norm": 1.3977723121643066, "learning_rate": 3.326811887777607e-06, "loss": 0.9049, "step": 3612 }, { "epoch": 0.7408242772196022, "grad_norm": 1.2885258197784424, "learning_rate": 3.321866698701187e-06, "loss": 0.9278, "step": 3613 }, { "epoch": 0.7410293213040804, "grad_norm": 1.2605984210968018, "learning_rate": 3.316924455623334e-06, "loss": 0.8333, "step": 3614 }, { "epoch": 0.7412343653885586, "grad_norm": 1.1880886554718018, "learning_rate": 3.3119851607242803e-06, "loss": 0.877, "step": 3615 }, { "epoch": 0.7414394094730367, "grad_norm": 1.1779131889343262, "learning_rate": 3.307048816182947e-06, "loss": 0.9132, "step": 3616 }, { "epoch": 0.7416444535575148, "grad_norm": 1.2833014726638794, "learning_rate": 3.3021154241769606e-06, "loss": 0.8457, "step": 3617 }, { "epoch": 0.741849497641993, "grad_norm": 1.2426438331604004, "learning_rate": 3.2971849868826366e-06, "loss": 0.8814, "step": 3618 }, { "epoch": 0.7420545417264712, "grad_norm": 1.2428759336471558, "learning_rate": 3.2922575064750017e-06, "loss": 0.9035, "step": 3619 }, { "epoch": 0.7422595858109493, "grad_norm": 1.2392921447753906, "learning_rate": 3.2873329851277656e-06, "loss": 0.9138, "step": 3620 }, { "epoch": 0.7424646298954275, "grad_norm": 1.2803022861480713, "learning_rate": 3.2824114250133378e-06, "loss": 0.863, "step": 3621 }, { "epoch": 0.7426696739799057, "grad_norm": 1.3227683305740356, "learning_rate": 3.2774928283028153e-06, "loss": 0.9433, "step": 3622 }, { "epoch": 0.7428747180643839, "grad_norm": 1.3282201290130615, "learning_rate": 3.2725771971660002e-06, "loss": 0.9479, "step": 3623 }, { "epoch": 0.743079762148862, "grad_norm": 1.2992340326309204, "learning_rate": 3.2676645337713773e-06, "loss": 0.8355, "step": 3624 }, { "epoch": 0.7432848062333401, "grad_norm": 1.1733394861221313, "learning_rate": 3.262754840286122e-06, "loss": 0.8832, "step": 3625 }, { "epoch": 0.7434898503178183, "grad_norm": 1.2428559064865112, "learning_rate": 3.2578481188761012e-06, "loss": 0.9163, "step": 3626 }, { "epoch": 0.7436948944022965, "grad_norm": 1.3575572967529297, "learning_rate": 3.2529443717058693e-06, "loss": 0.8566, "step": 3627 }, { "epoch": 0.7438999384867746, "grad_norm": 1.2831145524978638, "learning_rate": 3.2480436009386775e-06, "loss": 0.9417, "step": 3628 }, { "epoch": 0.7441049825712528, "grad_norm": 1.288586974143982, "learning_rate": 3.2431458087364478e-06, "loss": 0.8874, "step": 3629 }, { "epoch": 0.744310026655731, "grad_norm": 1.2369928359985352, "learning_rate": 3.2382509972598087e-06, "loss": 0.8686, "step": 3630 }, { "epoch": 0.7445150707402092, "grad_norm": 1.1999479532241821, "learning_rate": 3.233359168668049e-06, "loss": 0.8789, "step": 3631 }, { "epoch": 0.7447201148246874, "grad_norm": 1.2351435422897339, "learning_rate": 3.228470325119164e-06, "loss": 0.9173, "step": 3632 }, { "epoch": 0.7449251589091654, "grad_norm": 1.1655393838882446, "learning_rate": 3.223584468769818e-06, "loss": 0.939, "step": 3633 }, { "epoch": 0.7451302029936436, "grad_norm": 1.2832523584365845, "learning_rate": 3.2187016017753714e-06, "loss": 0.9184, "step": 3634 }, { "epoch": 0.7453352470781218, "grad_norm": 1.2575249671936035, "learning_rate": 3.2138217262898452e-06, "loss": 0.8758, "step": 3635 }, { "epoch": 0.7455402911626, "grad_norm": 1.2773175239562988, "learning_rate": 3.208944844465961e-06, "loss": 0.8083, "step": 3636 }, { "epoch": 0.7457453352470781, "grad_norm": 1.2942836284637451, "learning_rate": 3.20407095845511e-06, "loss": 0.8828, "step": 3637 }, { "epoch": 0.7459503793315563, "grad_norm": 1.333613634109497, "learning_rate": 3.199200070407358e-06, "loss": 0.9601, "step": 3638 }, { "epoch": 0.7461554234160345, "grad_norm": 1.3406885862350464, "learning_rate": 3.194332182471461e-06, "loss": 0.8523, "step": 3639 }, { "epoch": 0.7463604675005127, "grad_norm": 1.312886118888855, "learning_rate": 3.1894672967948414e-06, "loss": 0.9766, "step": 3640 }, { "epoch": 0.7465655115849907, "grad_norm": 1.2981455326080322, "learning_rate": 3.1846054155235996e-06, "loss": 0.997, "step": 3641 }, { "epoch": 0.7467705556694689, "grad_norm": 1.2104829549789429, "learning_rate": 3.179746540802506e-06, "loss": 0.876, "step": 3642 }, { "epoch": 0.7469755997539471, "grad_norm": 1.330299973487854, "learning_rate": 3.1748906747750217e-06, "loss": 0.8235, "step": 3643 }, { "epoch": 0.7471806438384253, "grad_norm": 1.2355948686599731, "learning_rate": 3.170037819583255e-06, "loss": 0.8633, "step": 3644 }, { "epoch": 0.7473856879229034, "grad_norm": 1.2330174446105957, "learning_rate": 3.165187977368007e-06, "loss": 0.8782, "step": 3645 }, { "epoch": 0.7475907320073816, "grad_norm": 1.2587777376174927, "learning_rate": 3.1603411502687378e-06, "loss": 0.8445, "step": 3646 }, { "epoch": 0.7477957760918598, "grad_norm": 1.191264271736145, "learning_rate": 3.155497340423588e-06, "loss": 0.9096, "step": 3647 }, { "epoch": 0.748000820176338, "grad_norm": 1.2640119791030884, "learning_rate": 3.1506565499693564e-06, "loss": 0.946, "step": 3648 }, { "epoch": 0.748205864260816, "grad_norm": 1.1874357461929321, "learning_rate": 3.1458187810415155e-06, "loss": 0.8575, "step": 3649 }, { "epoch": 0.7484109083452942, "grad_norm": 1.300716519355774, "learning_rate": 3.1409840357741994e-06, "loss": 0.8962, "step": 3650 }, { "epoch": 0.7486159524297724, "grad_norm": 1.2728251218795776, "learning_rate": 3.1361523163002194e-06, "loss": 0.9333, "step": 3651 }, { "epoch": 0.7488209965142506, "grad_norm": 1.3473503589630127, "learning_rate": 3.1313236247510414e-06, "loss": 0.89, "step": 3652 }, { "epoch": 0.7490260405987287, "grad_norm": 1.1587179899215698, "learning_rate": 3.1264979632568017e-06, "loss": 0.9371, "step": 3653 }, { "epoch": 0.7492310846832069, "grad_norm": 1.1726202964782715, "learning_rate": 3.121675333946291e-06, "loss": 0.834, "step": 3654 }, { "epoch": 0.7494361287676851, "grad_norm": 1.3115943670272827, "learning_rate": 3.1168557389469777e-06, "loss": 0.8907, "step": 3655 }, { "epoch": 0.7496411728521633, "grad_norm": 1.1842600107192993, "learning_rate": 3.1120391803849793e-06, "loss": 0.978, "step": 3656 }, { "epoch": 0.7498462169366413, "grad_norm": 1.3237515687942505, "learning_rate": 3.107225660385077e-06, "loss": 0.8095, "step": 3657 }, { "epoch": 0.7500512610211195, "grad_norm": 1.275201439857483, "learning_rate": 3.1024151810707116e-06, "loss": 0.9302, "step": 3658 }, { "epoch": 0.7502563051055977, "grad_norm": 1.2076011896133423, "learning_rate": 3.0976077445639794e-06, "loss": 0.9252, "step": 3659 }, { "epoch": 0.7504613491900759, "grad_norm": 1.298534631729126, "learning_rate": 3.0928033529856462e-06, "loss": 0.945, "step": 3660 }, { "epoch": 0.750666393274554, "grad_norm": 1.2055493593215942, "learning_rate": 3.088002008455121e-06, "loss": 0.8321, "step": 3661 }, { "epoch": 0.7508714373590322, "grad_norm": 1.214043140411377, "learning_rate": 3.0832037130904748e-06, "loss": 0.8453, "step": 3662 }, { "epoch": 0.7510764814435104, "grad_norm": 1.3167043924331665, "learning_rate": 3.0784084690084294e-06, "loss": 0.8949, "step": 3663 }, { "epoch": 0.7512815255279885, "grad_norm": 1.218463659286499, "learning_rate": 3.0736162783243696e-06, "loss": 0.8753, "step": 3664 }, { "epoch": 0.7514865696124666, "grad_norm": 1.1558942794799805, "learning_rate": 3.0688271431523207e-06, "loss": 0.9012, "step": 3665 }, { "epoch": 0.7516916136969448, "grad_norm": 1.2514513731002808, "learning_rate": 3.064041065604978e-06, "loss": 0.8811, "step": 3666 }, { "epoch": 0.751896657781423, "grad_norm": 1.1659506559371948, "learning_rate": 3.0592580477936606e-06, "loss": 0.9056, "step": 3667 }, { "epoch": 0.7521017018659012, "grad_norm": 1.1658521890640259, "learning_rate": 3.054478091828367e-06, "loss": 0.956, "step": 3668 }, { "epoch": 0.7523067459503794, "grad_norm": 1.2583144903182983, "learning_rate": 3.0497011998177273e-06, "loss": 0.9478, "step": 3669 }, { "epoch": 0.7525117900348575, "grad_norm": 1.2808843851089478, "learning_rate": 3.044927373869023e-06, "loss": 0.9182, "step": 3670 }, { "epoch": 0.7527168341193357, "grad_norm": 1.3167531490325928, "learning_rate": 3.0401566160881845e-06, "loss": 0.8283, "step": 3671 }, { "epoch": 0.7529218782038138, "grad_norm": 1.3442896604537964, "learning_rate": 3.035388928579792e-06, "loss": 0.9439, "step": 3672 }, { "epoch": 0.753126922288292, "grad_norm": 1.2466859817504883, "learning_rate": 3.0306243134470668e-06, "loss": 0.8806, "step": 3673 }, { "epoch": 0.7533319663727701, "grad_norm": 1.3358670473098755, "learning_rate": 3.0258627727918722e-06, "loss": 0.8803, "step": 3674 }, { "epoch": 0.7535370104572483, "grad_norm": 1.3041725158691406, "learning_rate": 3.0211043087147295e-06, "loss": 0.947, "step": 3675 }, { "epoch": 0.7537420545417265, "grad_norm": 1.2663029432296753, "learning_rate": 3.016348923314779e-06, "loss": 0.8612, "step": 3676 }, { "epoch": 0.7539470986262047, "grad_norm": 1.2847651243209839, "learning_rate": 3.011596618689825e-06, "loss": 0.9581, "step": 3677 }, { "epoch": 0.7541521427106828, "grad_norm": 1.4774824380874634, "learning_rate": 3.0068473969362998e-06, "loss": 0.9265, "step": 3678 }, { "epoch": 0.754357186795161, "grad_norm": 1.2149873971939087, "learning_rate": 3.002101260149287e-06, "loss": 0.8351, "step": 3679 }, { "epoch": 0.7545622308796391, "grad_norm": 1.3393425941467285, "learning_rate": 2.9973582104224917e-06, "loss": 0.8959, "step": 3680 }, { "epoch": 0.7547672749641173, "grad_norm": 1.1853774785995483, "learning_rate": 2.992618249848276e-06, "loss": 0.7819, "step": 3681 }, { "epoch": 0.7549723190485954, "grad_norm": 1.2175078392028809, "learning_rate": 2.9878813805176252e-06, "loss": 0.9255, "step": 3682 }, { "epoch": 0.7551773631330736, "grad_norm": 1.180022120475769, "learning_rate": 2.9831476045201724e-06, "loss": 0.8903, "step": 3683 }, { "epoch": 0.7553824072175518, "grad_norm": 1.2780907154083252, "learning_rate": 2.9784169239441785e-06, "loss": 0.8322, "step": 3684 }, { "epoch": 0.75558745130203, "grad_norm": 1.2086018323898315, "learning_rate": 2.973689340876539e-06, "loss": 0.9143, "step": 3685 }, { "epoch": 0.7557924953865081, "grad_norm": 1.1361850500106812, "learning_rate": 2.968964857402785e-06, "loss": 0.8693, "step": 3686 }, { "epoch": 0.7559975394709862, "grad_norm": 1.2630987167358398, "learning_rate": 2.9642434756070793e-06, "loss": 0.9418, "step": 3687 }, { "epoch": 0.7562025835554644, "grad_norm": 1.1951717138290405, "learning_rate": 2.9595251975722208e-06, "loss": 0.855, "step": 3688 }, { "epoch": 0.7564076276399426, "grad_norm": 1.201545238494873, "learning_rate": 2.954810025379633e-06, "loss": 0.9073, "step": 3689 }, { "epoch": 0.7566126717244207, "grad_norm": 1.293687343597412, "learning_rate": 2.9500979611093715e-06, "loss": 0.9038, "step": 3690 }, { "epoch": 0.7568177158088989, "grad_norm": 1.3201802968978882, "learning_rate": 2.9453890068401194e-06, "loss": 0.8969, "step": 3691 }, { "epoch": 0.7570227598933771, "grad_norm": 1.2827956676483154, "learning_rate": 2.940683164649194e-06, "loss": 0.8873, "step": 3692 }, { "epoch": 0.7572278039778553, "grad_norm": 1.2757853269577026, "learning_rate": 2.9359804366125345e-06, "loss": 0.9428, "step": 3693 }, { "epoch": 0.7574328480623334, "grad_norm": 1.306330919265747, "learning_rate": 2.9312808248047043e-06, "loss": 0.9575, "step": 3694 }, { "epoch": 0.7576378921468115, "grad_norm": 1.2059718370437622, "learning_rate": 2.926584331298894e-06, "loss": 0.9695, "step": 3695 }, { "epoch": 0.7578429362312897, "grad_norm": 1.4443057775497437, "learning_rate": 2.921890958166925e-06, "loss": 0.8945, "step": 3696 }, { "epoch": 0.7580479803157679, "grad_norm": 1.271909236907959, "learning_rate": 2.9172007074792342e-06, "loss": 0.987, "step": 3697 }, { "epoch": 0.758253024400246, "grad_norm": 1.2669578790664673, "learning_rate": 2.9125135813048833e-06, "loss": 0.8502, "step": 3698 }, { "epoch": 0.7584580684847242, "grad_norm": 1.3572906255722046, "learning_rate": 2.907829581711552e-06, "loss": 0.9242, "step": 3699 }, { "epoch": 0.7586631125692024, "grad_norm": 1.3705259561538696, "learning_rate": 2.903148710765552e-06, "loss": 0.8668, "step": 3700 }, { "epoch": 0.7588681566536806, "grad_norm": 1.2053372859954834, "learning_rate": 2.8984709705318048e-06, "loss": 0.8752, "step": 3701 }, { "epoch": 0.7590732007381586, "grad_norm": 1.3564186096191406, "learning_rate": 2.8937963630738517e-06, "loss": 0.9785, "step": 3702 }, { "epoch": 0.7592782448226368, "grad_norm": 1.1516188383102417, "learning_rate": 2.8891248904538516e-06, "loss": 0.7777, "step": 3703 }, { "epoch": 0.759483288907115, "grad_norm": 1.281677484512329, "learning_rate": 2.8844565547325897e-06, "loss": 0.7898, "step": 3704 }, { "epoch": 0.7596883329915932, "grad_norm": 1.2471851110458374, "learning_rate": 2.879791357969457e-06, "loss": 0.7948, "step": 3705 }, { "epoch": 0.7598933770760714, "grad_norm": 1.2557718753814697, "learning_rate": 2.8751293022224626e-06, "loss": 0.8839, "step": 3706 }, { "epoch": 0.7600984211605495, "grad_norm": 1.2424200773239136, "learning_rate": 2.87047038954823e-06, "loss": 0.849, "step": 3707 }, { "epoch": 0.7603034652450277, "grad_norm": 1.3109592199325562, "learning_rate": 2.8658146220019957e-06, "loss": 0.9341, "step": 3708 }, { "epoch": 0.7605085093295059, "grad_norm": 1.3095552921295166, "learning_rate": 2.861162001637613e-06, "loss": 0.8865, "step": 3709 }, { "epoch": 0.7607135534139841, "grad_norm": 1.1570295095443726, "learning_rate": 2.8565125305075405e-06, "loss": 0.8465, "step": 3710 }, { "epoch": 0.7609185974984621, "grad_norm": 1.3203516006469727, "learning_rate": 2.851866210662858e-06, "loss": 0.9414, "step": 3711 }, { "epoch": 0.7611236415829403, "grad_norm": 1.2337331771850586, "learning_rate": 2.8472230441532365e-06, "loss": 0.8074, "step": 3712 }, { "epoch": 0.7613286856674185, "grad_norm": 1.271085500717163, "learning_rate": 2.8425830330269753e-06, "loss": 0.8281, "step": 3713 }, { "epoch": 0.7615337297518967, "grad_norm": 1.38454270362854, "learning_rate": 2.837946179330969e-06, "loss": 0.9685, "step": 3714 }, { "epoch": 0.7617387738363748, "grad_norm": 1.1753710508346558, "learning_rate": 2.8333124851107317e-06, "loss": 0.8203, "step": 3715 }, { "epoch": 0.761943817920853, "grad_norm": 1.3642293214797974, "learning_rate": 2.8286819524103657e-06, "loss": 0.9428, "step": 3716 }, { "epoch": 0.7621488620053312, "grad_norm": 1.1327484846115112, "learning_rate": 2.8240545832725963e-06, "loss": 0.8682, "step": 3717 }, { "epoch": 0.7623539060898094, "grad_norm": 1.2964184284210205, "learning_rate": 2.819430379738742e-06, "loss": 0.8796, "step": 3718 }, { "epoch": 0.7625589501742874, "grad_norm": 1.2676947116851807, "learning_rate": 2.814809343848729e-06, "loss": 0.8594, "step": 3719 }, { "epoch": 0.7627639942587656, "grad_norm": 1.1846129894256592, "learning_rate": 2.810191477641089e-06, "loss": 0.9731, "step": 3720 }, { "epoch": 0.7629690383432438, "grad_norm": 1.236270546913147, "learning_rate": 2.80557678315295e-06, "loss": 0.8706, "step": 3721 }, { "epoch": 0.763174082427722, "grad_norm": 1.285380244255066, "learning_rate": 2.8009652624200436e-06, "loss": 0.9284, "step": 3722 }, { "epoch": 0.7633791265122001, "grad_norm": 1.2170886993408203, "learning_rate": 2.796356917476697e-06, "loss": 0.8779, "step": 3723 }, { "epoch": 0.7635841705966783, "grad_norm": 1.2587757110595703, "learning_rate": 2.79175175035585e-06, "loss": 0.9024, "step": 3724 }, { "epoch": 0.7637892146811565, "grad_norm": 1.2926385402679443, "learning_rate": 2.787149763089018e-06, "loss": 0.9056, "step": 3725 }, { "epoch": 0.7639942587656346, "grad_norm": 1.2478129863739014, "learning_rate": 2.782550957706335e-06, "loss": 0.8708, "step": 3726 }, { "epoch": 0.7641993028501127, "grad_norm": 1.3802889585494995, "learning_rate": 2.7779553362365184e-06, "loss": 0.9326, "step": 3727 }, { "epoch": 0.7644043469345909, "grad_norm": 1.291272521018982, "learning_rate": 2.7733629007068897e-06, "loss": 0.8957, "step": 3728 }, { "epoch": 0.7646093910190691, "grad_norm": 1.215036153793335, "learning_rate": 2.7687736531433596e-06, "loss": 0.9321, "step": 3729 }, { "epoch": 0.7648144351035473, "grad_norm": 1.2831475734710693, "learning_rate": 2.764187595570431e-06, "loss": 0.8586, "step": 3730 }, { "epoch": 0.7650194791880254, "grad_norm": 1.218544840812683, "learning_rate": 2.7596047300112005e-06, "loss": 0.8933, "step": 3731 }, { "epoch": 0.7652245232725036, "grad_norm": 1.2584975957870483, "learning_rate": 2.755025058487364e-06, "loss": 0.8887, "step": 3732 }, { "epoch": 0.7654295673569818, "grad_norm": 1.1960026025772095, "learning_rate": 2.7504485830191985e-06, "loss": 0.8841, "step": 3733 }, { "epoch": 0.76563461144146, "grad_norm": 1.2513294219970703, "learning_rate": 2.745875305625577e-06, "loss": 0.9962, "step": 3734 }, { "epoch": 0.765839655525938, "grad_norm": 1.3313407897949219, "learning_rate": 2.741305228323958e-06, "loss": 0.8636, "step": 3735 }, { "epoch": 0.7660446996104162, "grad_norm": 1.1745998859405518, "learning_rate": 2.736738353130388e-06, "loss": 0.8559, "step": 3736 }, { "epoch": 0.7662497436948944, "grad_norm": 1.3080110549926758, "learning_rate": 2.7321746820595084e-06, "loss": 0.8913, "step": 3737 }, { "epoch": 0.7664547877793726, "grad_norm": 1.3320645093917847, "learning_rate": 2.727614217124539e-06, "loss": 0.9073, "step": 3738 }, { "epoch": 0.7666598318638508, "grad_norm": 1.304726481437683, "learning_rate": 2.7230569603372893e-06, "loss": 0.9126, "step": 3739 }, { "epoch": 0.7668648759483289, "grad_norm": 1.2378212213516235, "learning_rate": 2.7185029137081464e-06, "loss": 0.8203, "step": 3740 }, { "epoch": 0.767069920032807, "grad_norm": 1.453420877456665, "learning_rate": 2.713952079246096e-06, "loss": 0.9148, "step": 3741 }, { "epoch": 0.7672749641172852, "grad_norm": 1.228344202041626, "learning_rate": 2.709404458958693e-06, "loss": 0.7915, "step": 3742 }, { "epoch": 0.7674800082017634, "grad_norm": 1.2333197593688965, "learning_rate": 2.70486005485208e-06, "loss": 0.8961, "step": 3743 }, { "epoch": 0.7676850522862415, "grad_norm": 1.2455447912216187, "learning_rate": 2.700318868930977e-06, "loss": 0.97, "step": 3744 }, { "epoch": 0.7678900963707197, "grad_norm": 1.2213443517684937, "learning_rate": 2.6957809031986927e-06, "loss": 0.8879, "step": 3745 }, { "epoch": 0.7680951404551979, "grad_norm": 1.3107354640960693, "learning_rate": 2.691246159657105e-06, "loss": 0.8374, "step": 3746 }, { "epoch": 0.7683001845396761, "grad_norm": 1.3563289642333984, "learning_rate": 2.6867146403066833e-06, "loss": 0.8442, "step": 3747 }, { "epoch": 0.7685052286241542, "grad_norm": 1.1685030460357666, "learning_rate": 2.682186347146456e-06, "loss": 0.9116, "step": 3748 }, { "epoch": 0.7687102727086323, "grad_norm": 1.2929961681365967, "learning_rate": 2.6776612821740476e-06, "loss": 0.9239, "step": 3749 }, { "epoch": 0.7689153167931105, "grad_norm": 1.2536662817001343, "learning_rate": 2.6731394473856464e-06, "loss": 0.8346, "step": 3750 }, { "epoch": 0.7691203608775887, "grad_norm": 1.2519727945327759, "learning_rate": 2.66862084477602e-06, "loss": 0.846, "step": 3751 }, { "epoch": 0.7693254049620668, "grad_norm": 1.2825236320495605, "learning_rate": 2.6641054763385044e-06, "loss": 0.8193, "step": 3752 }, { "epoch": 0.769530449046545, "grad_norm": 1.2934610843658447, "learning_rate": 2.6595933440650224e-06, "loss": 0.847, "step": 3753 }, { "epoch": 0.7697354931310232, "grad_norm": 1.245739459991455, "learning_rate": 2.6550844499460558e-06, "loss": 0.8511, "step": 3754 }, { "epoch": 0.7699405372155014, "grad_norm": 1.2633453607559204, "learning_rate": 2.6505787959706607e-06, "loss": 0.8553, "step": 3755 }, { "epoch": 0.7701455812999795, "grad_norm": 1.2795073986053467, "learning_rate": 2.6460763841264746e-06, "loss": 0.8681, "step": 3756 }, { "epoch": 0.7703506253844576, "grad_norm": 1.2566053867340088, "learning_rate": 2.6415772163996845e-06, "loss": 0.891, "step": 3757 }, { "epoch": 0.7705556694689358, "grad_norm": 1.3206818103790283, "learning_rate": 2.6370812947750667e-06, "loss": 0.9767, "step": 3758 }, { "epoch": 0.770760713553414, "grad_norm": 1.4019607305526733, "learning_rate": 2.6325886212359496e-06, "loss": 0.8249, "step": 3759 }, { "epoch": 0.7709657576378921, "grad_norm": 1.3134404420852661, "learning_rate": 2.6280991977642467e-06, "loss": 0.9552, "step": 3760 }, { "epoch": 0.7711708017223703, "grad_norm": 1.3876452445983887, "learning_rate": 2.6236130263404134e-06, "loss": 0.8921, "step": 3761 }, { "epoch": 0.7713758458068485, "grad_norm": 1.3491662740707397, "learning_rate": 2.619130108943494e-06, "loss": 0.8213, "step": 3762 }, { "epoch": 0.7715808898913267, "grad_norm": 1.3282297849655151, "learning_rate": 2.61465044755108e-06, "loss": 0.9106, "step": 3763 }, { "epoch": 0.7717859339758047, "grad_norm": 1.2400685548782349, "learning_rate": 2.610174044139342e-06, "loss": 0.8509, "step": 3764 }, { "epoch": 0.7719909780602829, "grad_norm": 1.1866544485092163, "learning_rate": 2.6057009006829993e-06, "loss": 0.8561, "step": 3765 }, { "epoch": 0.7721960221447611, "grad_norm": 1.2269234657287598, "learning_rate": 2.6012310191553413e-06, "loss": 0.9099, "step": 3766 }, { "epoch": 0.7724010662292393, "grad_norm": 1.3059037923812866, "learning_rate": 2.5967644015282146e-06, "loss": 0.8878, "step": 3767 }, { "epoch": 0.7726061103137174, "grad_norm": 1.2577251195907593, "learning_rate": 2.5923010497720257e-06, "loss": 0.9626, "step": 3768 }, { "epoch": 0.7728111543981956, "grad_norm": 1.2811516523361206, "learning_rate": 2.5878409658557467e-06, "loss": 0.8312, "step": 3769 }, { "epoch": 0.7730161984826738, "grad_norm": 1.3476508855819702, "learning_rate": 2.5833841517469025e-06, "loss": 0.8864, "step": 3770 }, { "epoch": 0.773221242567152, "grad_norm": 1.2305327653884888, "learning_rate": 2.578930609411575e-06, "loss": 0.924, "step": 3771 }, { "epoch": 0.77342628665163, "grad_norm": 1.205204963684082, "learning_rate": 2.5744803408144026e-06, "loss": 0.8765, "step": 3772 }, { "epoch": 0.7736313307361082, "grad_norm": 1.1699508428573608, "learning_rate": 2.570033347918587e-06, "loss": 0.8295, "step": 3773 }, { "epoch": 0.7738363748205864, "grad_norm": 1.2704805135726929, "learning_rate": 2.5655896326858753e-06, "loss": 0.8718, "step": 3774 }, { "epoch": 0.7740414189050646, "grad_norm": 1.2380712032318115, "learning_rate": 2.5611491970765735e-06, "loss": 0.9323, "step": 3775 }, { "epoch": 0.7742464629895428, "grad_norm": 1.4332027435302734, "learning_rate": 2.5567120430495372e-06, "loss": 0.9003, "step": 3776 }, { "epoch": 0.7744515070740209, "grad_norm": 1.2907112836837769, "learning_rate": 2.5522781725621814e-06, "loss": 0.9071, "step": 3777 }, { "epoch": 0.7746565511584991, "grad_norm": 1.3394410610198975, "learning_rate": 2.5478475875704677e-06, "loss": 0.8641, "step": 3778 }, { "epoch": 0.7748615952429773, "grad_norm": 1.2984340190887451, "learning_rate": 2.5434202900289073e-06, "loss": 0.9388, "step": 3779 }, { "epoch": 0.7750666393274555, "grad_norm": 1.3811111450195312, "learning_rate": 2.53899628189056e-06, "loss": 0.8779, "step": 3780 }, { "epoch": 0.7752716834119335, "grad_norm": 1.1560752391815186, "learning_rate": 2.5345755651070447e-06, "loss": 0.8192, "step": 3781 }, { "epoch": 0.7754767274964117, "grad_norm": 1.2925165891647339, "learning_rate": 2.530158141628515e-06, "loss": 0.8818, "step": 3782 }, { "epoch": 0.7756817715808899, "grad_norm": 1.2651573419570923, "learning_rate": 2.5257440134036803e-06, "loss": 0.8907, "step": 3783 }, { "epoch": 0.7758868156653681, "grad_norm": 1.449977159500122, "learning_rate": 2.5213331823797894e-06, "loss": 0.9101, "step": 3784 }, { "epoch": 0.7760918597498462, "grad_norm": 1.2676578760147095, "learning_rate": 2.5169256505026464e-06, "loss": 0.9031, "step": 3785 }, { "epoch": 0.7762969038343244, "grad_norm": 1.2813019752502441, "learning_rate": 2.5125214197165924e-06, "loss": 0.8614, "step": 3786 }, { "epoch": 0.7765019479188026, "grad_norm": 1.2049567699432373, "learning_rate": 2.508120491964512e-06, "loss": 0.8655, "step": 3787 }, { "epoch": 0.7767069920032807, "grad_norm": 1.3081886768341064, "learning_rate": 2.5037228691878424e-06, "loss": 0.9541, "step": 3788 }, { "epoch": 0.7769120360877588, "grad_norm": 1.217280387878418, "learning_rate": 2.499328553326543e-06, "loss": 0.8965, "step": 3789 }, { "epoch": 0.777117080172237, "grad_norm": 1.153335452079773, "learning_rate": 2.4949375463191384e-06, "loss": 0.8668, "step": 3790 }, { "epoch": 0.7773221242567152, "grad_norm": 1.2844960689544678, "learning_rate": 2.490549850102674e-06, "loss": 0.8883, "step": 3791 }, { "epoch": 0.7775271683411934, "grad_norm": 1.1763951778411865, "learning_rate": 2.486165466612751e-06, "loss": 0.8275, "step": 3792 }, { "epoch": 0.7777322124256715, "grad_norm": 1.1993674039840698, "learning_rate": 2.48178439778349e-06, "loss": 0.9267, "step": 3793 }, { "epoch": 0.7779372565101497, "grad_norm": 1.3037643432617188, "learning_rate": 2.4774066455475687e-06, "loss": 0.9702, "step": 3794 }, { "epoch": 0.7781423005946279, "grad_norm": 1.294675588607788, "learning_rate": 2.4730322118361883e-06, "loss": 0.8502, "step": 3795 }, { "epoch": 0.778347344679106, "grad_norm": 1.203118920326233, "learning_rate": 2.468661098579096e-06, "loss": 0.9328, "step": 3796 }, { "epoch": 0.7785523887635841, "grad_norm": 1.176822543144226, "learning_rate": 2.464293307704566e-06, "loss": 0.8155, "step": 3797 }, { "epoch": 0.7787574328480623, "grad_norm": 1.1620972156524658, "learning_rate": 2.4599288411394105e-06, "loss": 0.801, "step": 3798 }, { "epoch": 0.7789624769325405, "grad_norm": 1.2033342123031616, "learning_rate": 2.455567700808974e-06, "loss": 0.8496, "step": 3799 }, { "epoch": 0.7791675210170187, "grad_norm": 1.232791543006897, "learning_rate": 2.451209888637133e-06, "loss": 0.8853, "step": 3800 }, { "epoch": 0.7793725651014968, "grad_norm": 1.2864437103271484, "learning_rate": 2.446855406546301e-06, "loss": 0.8886, "step": 3801 }, { "epoch": 0.779577609185975, "grad_norm": 1.1922428607940674, "learning_rate": 2.4425042564574186e-06, "loss": 0.8933, "step": 3802 }, { "epoch": 0.7797826532704532, "grad_norm": 1.2049601078033447, "learning_rate": 2.438156440289954e-06, "loss": 0.8518, "step": 3803 }, { "epoch": 0.7799876973549313, "grad_norm": 1.2633112668991089, "learning_rate": 2.4338119599619048e-06, "loss": 0.8017, "step": 3804 }, { "epoch": 0.7801927414394094, "grad_norm": 1.1868689060211182, "learning_rate": 2.4294708173898064e-06, "loss": 0.8829, "step": 3805 }, { "epoch": 0.7803977855238876, "grad_norm": 1.4285414218902588, "learning_rate": 2.425133014488712e-06, "loss": 0.8606, "step": 3806 }, { "epoch": 0.7806028296083658, "grad_norm": 1.28708016872406, "learning_rate": 2.4207985531722034e-06, "loss": 0.9205, "step": 3807 }, { "epoch": 0.780807873692844, "grad_norm": 1.3372257947921753, "learning_rate": 2.4164674353523867e-06, "loss": 0.8905, "step": 3808 }, { "epoch": 0.7810129177773221, "grad_norm": 1.2467073202133179, "learning_rate": 2.412139662939902e-06, "loss": 0.8618, "step": 3809 }, { "epoch": 0.7812179618618003, "grad_norm": 1.2795199155807495, "learning_rate": 2.4078152378439033e-06, "loss": 0.9217, "step": 3810 }, { "epoch": 0.7814230059462784, "grad_norm": 1.3113722801208496, "learning_rate": 2.403494161972073e-06, "loss": 0.8799, "step": 3811 }, { "epoch": 0.7816280500307566, "grad_norm": 1.2183735370635986, "learning_rate": 2.3991764372306113e-06, "loss": 0.9263, "step": 3812 }, { "epoch": 0.7818330941152348, "grad_norm": 1.2222412824630737, "learning_rate": 2.3948620655242495e-06, "loss": 0.8768, "step": 3813 }, { "epoch": 0.7820381381997129, "grad_norm": 1.2536619901657104, "learning_rate": 2.390551048756232e-06, "loss": 0.856, "step": 3814 }, { "epoch": 0.7822431822841911, "grad_norm": 1.44545578956604, "learning_rate": 2.3862433888283245e-06, "loss": 0.8932, "step": 3815 }, { "epoch": 0.7824482263686693, "grad_norm": 1.1871587038040161, "learning_rate": 2.381939087640813e-06, "loss": 0.8479, "step": 3816 }, { "epoch": 0.7826532704531475, "grad_norm": 1.2891589403152466, "learning_rate": 2.377638147092497e-06, "loss": 0.8123, "step": 3817 }, { "epoch": 0.7828583145376256, "grad_norm": 1.2780588865280151, "learning_rate": 2.3733405690807056e-06, "loss": 0.7668, "step": 3818 }, { "epoch": 0.7830633586221037, "grad_norm": 1.2687978744506836, "learning_rate": 2.369046355501272e-06, "loss": 0.8704, "step": 3819 }, { "epoch": 0.7832684027065819, "grad_norm": 1.3267202377319336, "learning_rate": 2.364755508248552e-06, "loss": 0.9078, "step": 3820 }, { "epoch": 0.7834734467910601, "grad_norm": 1.4145315885543823, "learning_rate": 2.360468029215409e-06, "loss": 0.9668, "step": 3821 }, { "epoch": 0.7836784908755382, "grad_norm": 1.316096544265747, "learning_rate": 2.3561839202932344e-06, "loss": 0.9037, "step": 3822 }, { "epoch": 0.7838835349600164, "grad_norm": 1.1947910785675049, "learning_rate": 2.351903183371916e-06, "loss": 0.833, "step": 3823 }, { "epoch": 0.7840885790444946, "grad_norm": 1.1301954984664917, "learning_rate": 2.347625820339874e-06, "loss": 0.8794, "step": 3824 }, { "epoch": 0.7842936231289728, "grad_norm": 1.2550384998321533, "learning_rate": 2.343351833084014e-06, "loss": 0.9268, "step": 3825 }, { "epoch": 0.7844986672134509, "grad_norm": 1.109318733215332, "learning_rate": 2.3390812234897777e-06, "loss": 0.8468, "step": 3826 }, { "epoch": 0.784703711297929, "grad_norm": 1.23818838596344, "learning_rate": 2.3348139934411008e-06, "loss": 0.9532, "step": 3827 }, { "epoch": 0.7849087553824072, "grad_norm": 1.1462466716766357, "learning_rate": 2.3305501448204405e-06, "loss": 0.8183, "step": 3828 }, { "epoch": 0.7851137994668854, "grad_norm": 1.1971650123596191, "learning_rate": 2.326289679508744e-06, "loss": 0.8752, "step": 3829 }, { "epoch": 0.7853188435513635, "grad_norm": 1.273254156112671, "learning_rate": 2.3220325993854887e-06, "loss": 0.9054, "step": 3830 }, { "epoch": 0.7855238876358417, "grad_norm": 1.410770058631897, "learning_rate": 2.3177789063286416e-06, "loss": 0.8828, "step": 3831 }, { "epoch": 0.7857289317203199, "grad_norm": 1.1955965757369995, "learning_rate": 2.3135286022146785e-06, "loss": 0.8638, "step": 3832 }, { "epoch": 0.7859339758047981, "grad_norm": 1.2042236328125, "learning_rate": 2.30928168891859e-06, "loss": 0.8963, "step": 3833 }, { "epoch": 0.7861390198892761, "grad_norm": 1.321717619895935, "learning_rate": 2.30503816831386e-06, "loss": 0.9076, "step": 3834 }, { "epoch": 0.7863440639737543, "grad_norm": 1.1218639612197876, "learning_rate": 2.3007980422724807e-06, "loss": 0.834, "step": 3835 }, { "epoch": 0.7865491080582325, "grad_norm": 1.2646631002426147, "learning_rate": 2.296561312664941e-06, "loss": 0.8519, "step": 3836 }, { "epoch": 0.7867541521427107, "grad_norm": 1.3506234884262085, "learning_rate": 2.292327981360245e-06, "loss": 0.9814, "step": 3837 }, { "epoch": 0.7869591962271888, "grad_norm": 1.2517257928848267, "learning_rate": 2.2880980502258788e-06, "loss": 0.8996, "step": 3838 }, { "epoch": 0.787164240311667, "grad_norm": 1.237841010093689, "learning_rate": 2.2838715211278463e-06, "loss": 0.9043, "step": 3839 }, { "epoch": 0.7873692843961452, "grad_norm": 1.18500816822052, "learning_rate": 2.279648395930637e-06, "loss": 0.8864, "step": 3840 }, { "epoch": 0.7875743284806234, "grad_norm": 1.2440752983093262, "learning_rate": 2.2754286764972512e-06, "loss": 0.8068, "step": 3841 }, { "epoch": 0.7877793725651014, "grad_norm": 1.261377215385437, "learning_rate": 2.271212364689176e-06, "loss": 0.8543, "step": 3842 }, { "epoch": 0.7879844166495796, "grad_norm": 1.2419016361236572, "learning_rate": 2.2669994623664006e-06, "loss": 0.8975, "step": 3843 }, { "epoch": 0.7881894607340578, "grad_norm": 1.3120781183242798, "learning_rate": 2.262789971387405e-06, "loss": 0.9124, "step": 3844 }, { "epoch": 0.788394504818536, "grad_norm": 1.2331372499465942, "learning_rate": 2.2585838936091753e-06, "loss": 0.8886, "step": 3845 }, { "epoch": 0.7885995489030142, "grad_norm": 1.265372395515442, "learning_rate": 2.2543812308871826e-06, "loss": 0.8681, "step": 3846 }, { "epoch": 0.7888045929874923, "grad_norm": 1.2652928829193115, "learning_rate": 2.2501819850753925e-06, "loss": 0.9014, "step": 3847 }, { "epoch": 0.7890096370719705, "grad_norm": 1.2377996444702148, "learning_rate": 2.245986158026264e-06, "loss": 0.9011, "step": 3848 }, { "epoch": 0.7892146811564487, "grad_norm": 1.3688430786132812, "learning_rate": 2.2417937515907485e-06, "loss": 0.9213, "step": 3849 }, { "epoch": 0.7894197252409269, "grad_norm": 1.2344855070114136, "learning_rate": 2.237604767618291e-06, "loss": 0.9077, "step": 3850 }, { "epoch": 0.7896247693254049, "grad_norm": 1.2471187114715576, "learning_rate": 2.233419207956824e-06, "loss": 0.8476, "step": 3851 }, { "epoch": 0.7898298134098831, "grad_norm": 1.1257601976394653, "learning_rate": 2.229237074452768e-06, "loss": 0.8397, "step": 3852 }, { "epoch": 0.7900348574943613, "grad_norm": 1.3021090030670166, "learning_rate": 2.2250583689510306e-06, "loss": 0.8709, "step": 3853 }, { "epoch": 0.7902399015788395, "grad_norm": 1.2083818912506104, "learning_rate": 2.2208830932950175e-06, "loss": 0.8862, "step": 3854 }, { "epoch": 0.7904449456633176, "grad_norm": 1.2065297365188599, "learning_rate": 2.2167112493266095e-06, "loss": 0.8348, "step": 3855 }, { "epoch": 0.7906499897477958, "grad_norm": 1.2413607835769653, "learning_rate": 2.212542838886179e-06, "loss": 0.8747, "step": 3856 }, { "epoch": 0.790855033832274, "grad_norm": 1.2545127868652344, "learning_rate": 2.2083778638125796e-06, "loss": 0.7996, "step": 3857 }, { "epoch": 0.7910600779167521, "grad_norm": 1.2102961540222168, "learning_rate": 2.204216325943159e-06, "loss": 0.8153, "step": 3858 }, { "epoch": 0.7912651220012302, "grad_norm": 1.1904897689819336, "learning_rate": 2.200058227113736e-06, "loss": 0.9032, "step": 3859 }, { "epoch": 0.7914701660857084, "grad_norm": 1.282365322113037, "learning_rate": 2.1959035691586274e-06, "loss": 0.8306, "step": 3860 }, { "epoch": 0.7916752101701866, "grad_norm": 1.3154692649841309, "learning_rate": 2.1917523539106123e-06, "loss": 0.9385, "step": 3861 }, { "epoch": 0.7918802542546648, "grad_norm": 1.2700780630111694, "learning_rate": 2.1876045832009694e-06, "loss": 0.7849, "step": 3862 }, { "epoch": 0.7920852983391429, "grad_norm": 1.2194796800613403, "learning_rate": 2.1834602588594487e-06, "loss": 0.861, "step": 3863 }, { "epoch": 0.7922903424236211, "grad_norm": 1.4714125394821167, "learning_rate": 2.179319382714281e-06, "loss": 0.9142, "step": 3864 }, { "epoch": 0.7924953865080993, "grad_norm": 1.2870850563049316, "learning_rate": 2.1751819565921774e-06, "loss": 0.926, "step": 3865 }, { "epoch": 0.7927004305925774, "grad_norm": 1.273467779159546, "learning_rate": 2.171047982318324e-06, "loss": 0.9226, "step": 3866 }, { "epoch": 0.7929054746770555, "grad_norm": 1.314491629600525, "learning_rate": 2.16691746171639e-06, "loss": 0.8459, "step": 3867 }, { "epoch": 0.7931105187615337, "grad_norm": 1.2570064067840576, "learning_rate": 2.162790396608513e-06, "loss": 0.8489, "step": 3868 }, { "epoch": 0.7933155628460119, "grad_norm": 1.1494086980819702, "learning_rate": 2.158666788815319e-06, "loss": 0.9124, "step": 3869 }, { "epoch": 0.7935206069304901, "grad_norm": 1.2878623008728027, "learning_rate": 2.15454664015589e-06, "loss": 0.8926, "step": 3870 }, { "epoch": 0.7937256510149682, "grad_norm": 1.278631567955017, "learning_rate": 2.1504299524478013e-06, "loss": 0.9354, "step": 3871 }, { "epoch": 0.7939306950994464, "grad_norm": 1.193702220916748, "learning_rate": 2.1463167275070863e-06, "loss": 0.8623, "step": 3872 }, { "epoch": 0.7941357391839245, "grad_norm": 1.3185451030731201, "learning_rate": 2.1422069671482646e-06, "loss": 0.9366, "step": 3873 }, { "epoch": 0.7943407832684027, "grad_norm": 1.2505884170532227, "learning_rate": 2.138100673184311e-06, "loss": 0.8729, "step": 3874 }, { "epoch": 0.7945458273528808, "grad_norm": 1.2307775020599365, "learning_rate": 2.1339978474266875e-06, "loss": 0.865, "step": 3875 }, { "epoch": 0.794750871437359, "grad_norm": 1.2259652614593506, "learning_rate": 2.1298984916853126e-06, "loss": 0.8088, "step": 3876 }, { "epoch": 0.7949559155218372, "grad_norm": 1.2222260236740112, "learning_rate": 2.125802607768588e-06, "loss": 0.9176, "step": 3877 }, { "epoch": 0.7951609596063154, "grad_norm": 1.1922539472579956, "learning_rate": 2.1217101974833708e-06, "loss": 0.8522, "step": 3878 }, { "epoch": 0.7953660036907935, "grad_norm": 1.2590996026992798, "learning_rate": 2.1176212626349936e-06, "loss": 0.8795, "step": 3879 }, { "epoch": 0.7955710477752717, "grad_norm": 1.23446786403656, "learning_rate": 2.1135358050272515e-06, "loss": 0.8787, "step": 3880 }, { "epoch": 0.7957760918597498, "grad_norm": 1.1716387271881104, "learning_rate": 2.1094538264624053e-06, "loss": 0.7975, "step": 3881 }, { "epoch": 0.795981135944228, "grad_norm": 1.2195802927017212, "learning_rate": 2.1053753287411895e-06, "loss": 0.8388, "step": 3882 }, { "epoch": 0.7961861800287062, "grad_norm": 1.3204017877578735, "learning_rate": 2.1013003136627917e-06, "loss": 0.8307, "step": 3883 }, { "epoch": 0.7963912241131843, "grad_norm": 1.3666425943374634, "learning_rate": 2.0972287830248705e-06, "loss": 0.9013, "step": 3884 }, { "epoch": 0.7965962681976625, "grad_norm": 1.2570346593856812, "learning_rate": 2.093160738623541e-06, "loss": 0.844, "step": 3885 }, { "epoch": 0.7968013122821407, "grad_norm": 1.3597317934036255, "learning_rate": 2.0890961822533916e-06, "loss": 0.9261, "step": 3886 }, { "epoch": 0.7970063563666189, "grad_norm": 1.2737830877304077, "learning_rate": 2.08503511570746e-06, "loss": 0.8926, "step": 3887 }, { "epoch": 0.797211400451097, "grad_norm": 1.3651320934295654, "learning_rate": 2.0809775407772505e-06, "loss": 0.8296, "step": 3888 }, { "epoch": 0.7974164445355751, "grad_norm": 1.290639042854309, "learning_rate": 2.0769234592527233e-06, "loss": 0.8537, "step": 3889 }, { "epoch": 0.7976214886200533, "grad_norm": 1.3256486654281616, "learning_rate": 2.0728728729223057e-06, "loss": 0.8668, "step": 3890 }, { "epoch": 0.7978265327045315, "grad_norm": 1.2202842235565186, "learning_rate": 2.068825783572873e-06, "loss": 0.8478, "step": 3891 }, { "epoch": 0.7980315767890096, "grad_norm": 1.2511564493179321, "learning_rate": 2.064782192989765e-06, "loss": 0.9644, "step": 3892 }, { "epoch": 0.7982366208734878, "grad_norm": 1.3087095022201538, "learning_rate": 2.06074210295677e-06, "loss": 0.9515, "step": 3893 }, { "epoch": 0.798441664957966, "grad_norm": 1.2429240942001343, "learning_rate": 2.0567055152561444e-06, "loss": 0.8439, "step": 3894 }, { "epoch": 0.7986467090424442, "grad_norm": 1.2999225854873657, "learning_rate": 2.0526724316685897e-06, "loss": 0.8714, "step": 3895 }, { "epoch": 0.7988517531269222, "grad_norm": 1.2437423467636108, "learning_rate": 2.048642853973265e-06, "loss": 0.9049, "step": 3896 }, { "epoch": 0.7990567972114004, "grad_norm": 1.2648805379867554, "learning_rate": 2.0446167839477815e-06, "loss": 0.8991, "step": 3897 }, { "epoch": 0.7992618412958786, "grad_norm": 1.2524532079696655, "learning_rate": 2.0405942233682017e-06, "loss": 0.8509, "step": 3898 }, { "epoch": 0.7994668853803568, "grad_norm": 1.2229747772216797, "learning_rate": 2.0365751740090476e-06, "loss": 0.9216, "step": 3899 }, { "epoch": 0.7996719294648349, "grad_norm": 1.2333812713623047, "learning_rate": 2.0325596376432832e-06, "loss": 0.8554, "step": 3900 }, { "epoch": 0.7998769735493131, "grad_norm": 1.3864216804504395, "learning_rate": 2.0285476160423267e-06, "loss": 0.9099, "step": 3901 }, { "epoch": 0.8000820176337913, "grad_norm": 1.2692551612854004, "learning_rate": 2.0245391109760437e-06, "loss": 0.9445, "step": 3902 }, { "epoch": 0.8002870617182695, "grad_norm": 1.2513163089752197, "learning_rate": 2.0205341242127553e-06, "loss": 0.8974, "step": 3903 }, { "epoch": 0.8004921058027475, "grad_norm": 1.2377612590789795, "learning_rate": 2.016532657519218e-06, "loss": 0.9125, "step": 3904 }, { "epoch": 0.8006971498872257, "grad_norm": 1.2909266948699951, "learning_rate": 2.012534712660653e-06, "loss": 0.8916, "step": 3905 }, { "epoch": 0.8009021939717039, "grad_norm": 1.277141809463501, "learning_rate": 2.008540291400708e-06, "loss": 0.8542, "step": 3906 }, { "epoch": 0.8011072380561821, "grad_norm": 1.273547887802124, "learning_rate": 2.0045493955014915e-06, "loss": 0.7853, "step": 3907 }, { "epoch": 0.8013122821406602, "grad_norm": 1.255210041999817, "learning_rate": 2.0005620267235483e-06, "loss": 0.9421, "step": 3908 }, { "epoch": 0.8015173262251384, "grad_norm": 1.2552274465560913, "learning_rate": 1.996578186825876e-06, "loss": 0.9479, "step": 3909 }, { "epoch": 0.8017223703096166, "grad_norm": 1.4597046375274658, "learning_rate": 1.9925978775659015e-06, "loss": 0.8816, "step": 3910 }, { "epoch": 0.8019274143940948, "grad_norm": 1.3096671104431152, "learning_rate": 1.9886211006995092e-06, "loss": 0.9198, "step": 3911 }, { "epoch": 0.8021324584785728, "grad_norm": 1.3683110475540161, "learning_rate": 1.984647857981017e-06, "loss": 0.9123, "step": 3912 }, { "epoch": 0.802337502563051, "grad_norm": 1.256894826889038, "learning_rate": 1.9806781511631813e-06, "loss": 0.8559, "step": 3913 }, { "epoch": 0.8025425466475292, "grad_norm": 1.2059831619262695, "learning_rate": 1.976711981997208e-06, "loss": 0.9606, "step": 3914 }, { "epoch": 0.8027475907320074, "grad_norm": 1.2592405080795288, "learning_rate": 1.9727493522327355e-06, "loss": 0.9111, "step": 3915 }, { "epoch": 0.8029526348164856, "grad_norm": 1.3161516189575195, "learning_rate": 1.968790263617841e-06, "loss": 0.8121, "step": 3916 }, { "epoch": 0.8031576789009637, "grad_norm": 1.3377690315246582, "learning_rate": 1.96483471789904e-06, "loss": 0.847, "step": 3917 }, { "epoch": 0.8033627229854419, "grad_norm": 1.161454677581787, "learning_rate": 1.9608827168212917e-06, "loss": 0.853, "step": 3918 }, { "epoch": 0.8035677670699201, "grad_norm": 1.3203786611557007, "learning_rate": 1.9569342621279776e-06, "loss": 0.9318, "step": 3919 }, { "epoch": 0.8037728111543982, "grad_norm": 1.2713059186935425, "learning_rate": 1.952989355560929e-06, "loss": 0.8807, "step": 3920 }, { "epoch": 0.8039778552388763, "grad_norm": 1.3214704990386963, "learning_rate": 1.9490479988604027e-06, "loss": 0.9614, "step": 3921 }, { "epoch": 0.8041828993233545, "grad_norm": 1.3095974922180176, "learning_rate": 1.9451101937650963e-06, "loss": 0.8593, "step": 3922 }, { "epoch": 0.8043879434078327, "grad_norm": 1.268154501914978, "learning_rate": 1.9411759420121358e-06, "loss": 0.9026, "step": 3923 }, { "epoch": 0.8045929874923109, "grad_norm": 1.1879699230194092, "learning_rate": 1.9372452453370793e-06, "loss": 0.8831, "step": 3924 }, { "epoch": 0.804798031576789, "grad_norm": 1.4408107995986938, "learning_rate": 1.933318105473918e-06, "loss": 0.8903, "step": 3925 }, { "epoch": 0.8050030756612672, "grad_norm": 1.2656053304672241, "learning_rate": 1.929394524155079e-06, "loss": 0.9081, "step": 3926 }, { "epoch": 0.8052081197457454, "grad_norm": 1.1766343116760254, "learning_rate": 1.925474503111412e-06, "loss": 0.8978, "step": 3927 }, { "epoch": 0.8054131638302235, "grad_norm": 1.180735468864441, "learning_rate": 1.9215580440721983e-06, "loss": 0.8694, "step": 3928 }, { "epoch": 0.8056182079147016, "grad_norm": 1.1903822422027588, "learning_rate": 1.9176451487651494e-06, "loss": 0.8597, "step": 3929 }, { "epoch": 0.8058232519991798, "grad_norm": 1.2990835905075073, "learning_rate": 1.9137358189164025e-06, "loss": 0.9303, "step": 3930 }, { "epoch": 0.806028296083658, "grad_norm": 1.1676799058914185, "learning_rate": 1.9098300562505266e-06, "loss": 0.9061, "step": 3931 }, { "epoch": 0.8062333401681362, "grad_norm": 1.1146292686462402, "learning_rate": 1.905927862490512e-06, "loss": 0.829, "step": 3932 }, { "epoch": 0.8064383842526143, "grad_norm": 1.3141109943389893, "learning_rate": 1.9020292393577766e-06, "loss": 0.9152, "step": 3933 }, { "epoch": 0.8066434283370925, "grad_norm": 1.2367826700210571, "learning_rate": 1.8981341885721617e-06, "loss": 0.9238, "step": 3934 }, { "epoch": 0.8068484724215707, "grad_norm": 1.243607521057129, "learning_rate": 1.8942427118519369e-06, "loss": 0.8539, "step": 3935 }, { "epoch": 0.8070535165060488, "grad_norm": 1.2858173847198486, "learning_rate": 1.8903548109137903e-06, "loss": 0.9186, "step": 3936 }, { "epoch": 0.8072585605905269, "grad_norm": 1.2410961389541626, "learning_rate": 1.8864704874728346e-06, "loss": 0.7954, "step": 3937 }, { "epoch": 0.8074636046750051, "grad_norm": 1.2116707563400269, "learning_rate": 1.882589743242602e-06, "loss": 0.8922, "step": 3938 }, { "epoch": 0.8076686487594833, "grad_norm": 1.3279207944869995, "learning_rate": 1.8787125799350535e-06, "loss": 0.8839, "step": 3939 }, { "epoch": 0.8078736928439615, "grad_norm": 1.1954079866409302, "learning_rate": 1.8748389992605598e-06, "loss": 0.9284, "step": 3940 }, { "epoch": 0.8080787369284396, "grad_norm": 1.274342656135559, "learning_rate": 1.8709690029279226e-06, "loss": 0.9471, "step": 3941 }, { "epoch": 0.8082837810129178, "grad_norm": 1.3363291025161743, "learning_rate": 1.8671025926443464e-06, "loss": 0.8824, "step": 3942 }, { "epoch": 0.808488825097396, "grad_norm": 1.2623683214187622, "learning_rate": 1.8632397701154714e-06, "loss": 0.9146, "step": 3943 }, { "epoch": 0.8086938691818741, "grad_norm": 1.1428691148757935, "learning_rate": 1.8593805370453444e-06, "loss": 0.8375, "step": 3944 }, { "epoch": 0.8088989132663522, "grad_norm": 1.2955725193023682, "learning_rate": 1.8555248951364325e-06, "loss": 0.9503, "step": 3945 }, { "epoch": 0.8091039573508304, "grad_norm": 1.205072283744812, "learning_rate": 1.851672846089615e-06, "loss": 0.836, "step": 3946 }, { "epoch": 0.8093090014353086, "grad_norm": 1.2662066221237183, "learning_rate": 1.8478243916041882e-06, "loss": 0.9081, "step": 3947 }, { "epoch": 0.8095140455197868, "grad_norm": 1.3836727142333984, "learning_rate": 1.8439795333778676e-06, "loss": 0.8911, "step": 3948 }, { "epoch": 0.8097190896042649, "grad_norm": 1.2696387767791748, "learning_rate": 1.840138273106774e-06, "loss": 0.8459, "step": 3949 }, { "epoch": 0.809924133688743, "grad_norm": 1.2668991088867188, "learning_rate": 1.836300612485451e-06, "loss": 0.8859, "step": 3950 }, { "epoch": 0.8101291777732212, "grad_norm": 1.3586567640304565, "learning_rate": 1.8324665532068386e-06, "loss": 0.9237, "step": 3951 }, { "epoch": 0.8103342218576994, "grad_norm": 1.2655012607574463, "learning_rate": 1.828636096962304e-06, "loss": 0.8319, "step": 3952 }, { "epoch": 0.8105392659421776, "grad_norm": 1.3385515213012695, "learning_rate": 1.8248092454416166e-06, "loss": 0.9948, "step": 3953 }, { "epoch": 0.8107443100266557, "grad_norm": 1.2293331623077393, "learning_rate": 1.8209860003329594e-06, "loss": 0.8825, "step": 3954 }, { "epoch": 0.8109493541111339, "grad_norm": 1.7643067836761475, "learning_rate": 1.817166363322922e-06, "loss": 0.8908, "step": 3955 }, { "epoch": 0.8111543981956121, "grad_norm": 1.2381948232650757, "learning_rate": 1.8133503360965032e-06, "loss": 0.83, "step": 3956 }, { "epoch": 0.8113594422800903, "grad_norm": 1.2395051717758179, "learning_rate": 1.8095379203371044e-06, "loss": 0.9261, "step": 3957 }, { "epoch": 0.8115644863645683, "grad_norm": 1.159805417060852, "learning_rate": 1.8057291177265467e-06, "loss": 0.9243, "step": 3958 }, { "epoch": 0.8117695304490465, "grad_norm": 1.1959946155548096, "learning_rate": 1.8019239299450441e-06, "loss": 0.8591, "step": 3959 }, { "epoch": 0.8119745745335247, "grad_norm": 1.1946672201156616, "learning_rate": 1.7981223586712226e-06, "loss": 0.8465, "step": 3960 }, { "epoch": 0.8121796186180029, "grad_norm": 1.3509917259216309, "learning_rate": 1.7943244055821107e-06, "loss": 0.9924, "step": 3961 }, { "epoch": 0.812384662702481, "grad_norm": 1.2982062101364136, "learning_rate": 1.7905300723531393e-06, "loss": 0.8744, "step": 3962 }, { "epoch": 0.8125897067869592, "grad_norm": 1.313735842704773, "learning_rate": 1.7867393606581484e-06, "loss": 0.8734, "step": 3963 }, { "epoch": 0.8127947508714374, "grad_norm": 1.2446980476379395, "learning_rate": 1.7829522721693738e-06, "loss": 0.9598, "step": 3964 }, { "epoch": 0.8129997949559156, "grad_norm": 1.2092759609222412, "learning_rate": 1.7791688085574576e-06, "loss": 0.9003, "step": 3965 }, { "epoch": 0.8132048390403936, "grad_norm": 1.236019492149353, "learning_rate": 1.7753889714914363e-06, "loss": 0.9036, "step": 3966 }, { "epoch": 0.8134098831248718, "grad_norm": 1.231948733329773, "learning_rate": 1.771612762638758e-06, "loss": 0.8268, "step": 3967 }, { "epoch": 0.81361492720935, "grad_norm": 1.2347005605697632, "learning_rate": 1.7678401836652582e-06, "loss": 0.9339, "step": 3968 }, { "epoch": 0.8138199712938282, "grad_norm": 1.251940369606018, "learning_rate": 1.7640712362351785e-06, "loss": 0.9412, "step": 3969 }, { "epoch": 0.8140250153783063, "grad_norm": 1.2777292728424072, "learning_rate": 1.7603059220111529e-06, "loss": 0.7911, "step": 3970 }, { "epoch": 0.8142300594627845, "grad_norm": 1.2969791889190674, "learning_rate": 1.7565442426542212e-06, "loss": 0.9045, "step": 3971 }, { "epoch": 0.8144351035472627, "grad_norm": 1.3837913274765015, "learning_rate": 1.7527861998238094e-06, "loss": 0.8712, "step": 3972 }, { "epoch": 0.8146401476317409, "grad_norm": 1.2601263523101807, "learning_rate": 1.7490317951777513e-06, "loss": 0.9592, "step": 3973 }, { "epoch": 0.8148451917162189, "grad_norm": 1.3427950143814087, "learning_rate": 1.74528103037226e-06, "loss": 0.9089, "step": 3974 }, { "epoch": 0.8150502358006971, "grad_norm": 1.1745439767837524, "learning_rate": 1.7415339070619586e-06, "loss": 0.9136, "step": 3975 }, { "epoch": 0.8152552798851753, "grad_norm": 1.2749539613723755, "learning_rate": 1.7377904268998536e-06, "loss": 0.916, "step": 3976 }, { "epoch": 0.8154603239696535, "grad_norm": 1.157657504081726, "learning_rate": 1.7340505915373495e-06, "loss": 0.7939, "step": 3977 }, { "epoch": 0.8156653680541316, "grad_norm": 1.2395942211151123, "learning_rate": 1.730314402624239e-06, "loss": 0.8966, "step": 3978 }, { "epoch": 0.8158704121386098, "grad_norm": 1.2871230840682983, "learning_rate": 1.7265818618087071e-06, "loss": 0.9027, "step": 3979 }, { "epoch": 0.816075456223088, "grad_norm": 1.2187061309814453, "learning_rate": 1.7228529707373364e-06, "loss": 0.8236, "step": 3980 }, { "epoch": 0.8162805003075662, "grad_norm": 1.2862943410873413, "learning_rate": 1.7191277310550869e-06, "loss": 0.8714, "step": 3981 }, { "epoch": 0.8164855443920442, "grad_norm": 1.2695519924163818, "learning_rate": 1.7154061444053239e-06, "loss": 0.9282, "step": 3982 }, { "epoch": 0.8166905884765224, "grad_norm": 1.117562174797058, "learning_rate": 1.7116882124297795e-06, "loss": 0.8009, "step": 3983 }, { "epoch": 0.8168956325610006, "grad_norm": 1.23762047290802, "learning_rate": 1.7079739367685965e-06, "loss": 0.9273, "step": 3984 }, { "epoch": 0.8171006766454788, "grad_norm": 1.1106332540512085, "learning_rate": 1.7042633190602875e-06, "loss": 0.8712, "step": 3985 }, { "epoch": 0.8173057207299569, "grad_norm": 1.2189207077026367, "learning_rate": 1.7005563609417653e-06, "loss": 0.9589, "step": 3986 }, { "epoch": 0.8175107648144351, "grad_norm": 1.1975805759429932, "learning_rate": 1.6968530640483126e-06, "loss": 0.905, "step": 3987 }, { "epoch": 0.8177158088989133, "grad_norm": 1.2998991012573242, "learning_rate": 1.6931534300136133e-06, "loss": 0.8594, "step": 3988 }, { "epoch": 0.8179208529833915, "grad_norm": 1.2991150617599487, "learning_rate": 1.6894574604697212e-06, "loss": 0.9361, "step": 3989 }, { "epoch": 0.8181258970678696, "grad_norm": 1.240891456604004, "learning_rate": 1.6857651570470857e-06, "loss": 0.8594, "step": 3990 }, { "epoch": 0.8183309411523477, "grad_norm": 1.2363171577453613, "learning_rate": 1.6820765213745305e-06, "loss": 0.9448, "step": 3991 }, { "epoch": 0.8185359852368259, "grad_norm": 1.0752559900283813, "learning_rate": 1.6783915550792652e-06, "loss": 0.7424, "step": 3992 }, { "epoch": 0.8187410293213041, "grad_norm": 1.2728124856948853, "learning_rate": 1.674710259786879e-06, "loss": 0.8588, "step": 3993 }, { "epoch": 0.8189460734057823, "grad_norm": 1.2804445028305054, "learning_rate": 1.6710326371213393e-06, "loss": 0.8731, "step": 3994 }, { "epoch": 0.8191511174902604, "grad_norm": 1.2183880805969238, "learning_rate": 1.667358688705002e-06, "loss": 0.8811, "step": 3995 }, { "epoch": 0.8193561615747386, "grad_norm": 1.1679332256317139, "learning_rate": 1.6636884161585931e-06, "loss": 0.896, "step": 3996 }, { "epoch": 0.8195612056592168, "grad_norm": 1.1879589557647705, "learning_rate": 1.660021821101222e-06, "loss": 0.9126, "step": 3997 }, { "epoch": 0.8197662497436949, "grad_norm": 1.3067885637283325, "learning_rate": 1.6563589051503704e-06, "loss": 0.8806, "step": 3998 }, { "epoch": 0.819971293828173, "grad_norm": 1.2978014945983887, "learning_rate": 1.6526996699219067e-06, "loss": 0.9032, "step": 3999 }, { "epoch": 0.8201763379126512, "grad_norm": 1.1998484134674072, "learning_rate": 1.6490441170300653e-06, "loss": 0.8903, "step": 4000 }, { "epoch": 0.8203813819971294, "grad_norm": 1.1780848503112793, "learning_rate": 1.645392248087463e-06, "loss": 0.9025, "step": 4001 }, { "epoch": 0.8205864260816076, "grad_norm": 1.3407187461853027, "learning_rate": 1.6417440647050853e-06, "loss": 0.9047, "step": 4002 }, { "epoch": 0.8207914701660857, "grad_norm": 1.274344563484192, "learning_rate": 1.6380995684922995e-06, "loss": 0.8638, "step": 4003 }, { "epoch": 0.8209965142505639, "grad_norm": 1.1927255392074585, "learning_rate": 1.6344587610568418e-06, "loss": 0.8331, "step": 4004 }, { "epoch": 0.821201558335042, "grad_norm": 1.3087533712387085, "learning_rate": 1.6308216440048208e-06, "loss": 0.9256, "step": 4005 }, { "epoch": 0.8214066024195202, "grad_norm": 1.2350380420684814, "learning_rate": 1.6271882189407162e-06, "loss": 0.8526, "step": 4006 }, { "epoch": 0.8216116465039983, "grad_norm": 1.187748670578003, "learning_rate": 1.6235584874673848e-06, "loss": 0.8662, "step": 4007 }, { "epoch": 0.8218166905884765, "grad_norm": 1.1847833395004272, "learning_rate": 1.619932451186048e-06, "loss": 0.8824, "step": 4008 }, { "epoch": 0.8220217346729547, "grad_norm": 1.2814667224884033, "learning_rate": 1.6163101116962999e-06, "loss": 0.9249, "step": 4009 }, { "epoch": 0.8222267787574329, "grad_norm": 1.3886440992355347, "learning_rate": 1.6126914705961016e-06, "loss": 0.8407, "step": 4010 }, { "epoch": 0.822431822841911, "grad_norm": 1.240107536315918, "learning_rate": 1.6090765294817833e-06, "loss": 0.9034, "step": 4011 }, { "epoch": 0.8226368669263892, "grad_norm": 1.2321717739105225, "learning_rate": 1.6054652899480472e-06, "loss": 0.8286, "step": 4012 }, { "epoch": 0.8228419110108673, "grad_norm": 1.222705364227295, "learning_rate": 1.6018577535879586e-06, "loss": 0.9328, "step": 4013 }, { "epoch": 0.8230469550953455, "grad_norm": 1.2385854721069336, "learning_rate": 1.5982539219929482e-06, "loss": 0.9303, "step": 4014 }, { "epoch": 0.8232519991798236, "grad_norm": 1.2209371328353882, "learning_rate": 1.594653796752812e-06, "loss": 0.842, "step": 4015 }, { "epoch": 0.8234570432643018, "grad_norm": 1.2245814800262451, "learning_rate": 1.591057379455717e-06, "loss": 0.9156, "step": 4016 }, { "epoch": 0.82366208734878, "grad_norm": 1.2398090362548828, "learning_rate": 1.587464671688187e-06, "loss": 0.9047, "step": 4017 }, { "epoch": 0.8238671314332582, "grad_norm": 1.2484833002090454, "learning_rate": 1.5838756750351192e-06, "loss": 0.8584, "step": 4018 }, { "epoch": 0.8240721755177363, "grad_norm": 1.3337817192077637, "learning_rate": 1.5802903910797584e-06, "loss": 0.9067, "step": 4019 }, { "epoch": 0.8242772196022145, "grad_norm": 1.386606216430664, "learning_rate": 1.5767088214037275e-06, "loss": 0.8713, "step": 4020 }, { "epoch": 0.8244822636866926, "grad_norm": 1.3328388929367065, "learning_rate": 1.5731309675869977e-06, "loss": 0.8297, "step": 4021 }, { "epoch": 0.8246873077711708, "grad_norm": 1.1599317789077759, "learning_rate": 1.5695568312079156e-06, "loss": 0.8825, "step": 4022 }, { "epoch": 0.824892351855649, "grad_norm": 1.2363461256027222, "learning_rate": 1.565986413843169e-06, "loss": 0.8077, "step": 4023 }, { "epoch": 0.8250973959401271, "grad_norm": 1.340226411819458, "learning_rate": 1.5624197170678235e-06, "loss": 0.9185, "step": 4024 }, { "epoch": 0.8253024400246053, "grad_norm": 1.247788906097412, "learning_rate": 1.5588567424552925e-06, "loss": 0.8884, "step": 4025 }, { "epoch": 0.8255074841090835, "grad_norm": 1.300889015197754, "learning_rate": 1.5552974915773477e-06, "loss": 0.8931, "step": 4026 }, { "epoch": 0.8257125281935617, "grad_norm": 1.1413649320602417, "learning_rate": 1.5517419660041277e-06, "loss": 0.8029, "step": 4027 }, { "epoch": 0.8259175722780397, "grad_norm": 1.2377365827560425, "learning_rate": 1.5481901673041122e-06, "loss": 0.9201, "step": 4028 }, { "epoch": 0.8261226163625179, "grad_norm": 1.1981689929962158, "learning_rate": 1.544642097044151e-06, "loss": 0.8214, "step": 4029 }, { "epoch": 0.8263276604469961, "grad_norm": 1.210386037826538, "learning_rate": 1.5410977567894403e-06, "loss": 0.8383, "step": 4030 }, { "epoch": 0.8265327045314743, "grad_norm": 1.256076693534851, "learning_rate": 1.5375571481035401e-06, "loss": 0.8901, "step": 4031 }, { "epoch": 0.8267377486159524, "grad_norm": 1.2933549880981445, "learning_rate": 1.534020272548349e-06, "loss": 0.9085, "step": 4032 }, { "epoch": 0.8269427927004306, "grad_norm": 1.3081578016281128, "learning_rate": 1.5304871316841352e-06, "loss": 0.8626, "step": 4033 }, { "epoch": 0.8271478367849088, "grad_norm": 1.2164711952209473, "learning_rate": 1.526957727069507e-06, "loss": 0.859, "step": 4034 }, { "epoch": 0.827352880869387, "grad_norm": 1.2152389287948608, "learning_rate": 1.523432060261434e-06, "loss": 0.8553, "step": 4035 }, { "epoch": 0.827557924953865, "grad_norm": 1.2553901672363281, "learning_rate": 1.51991013281523e-06, "loss": 0.8888, "step": 4036 }, { "epoch": 0.8277629690383432, "grad_norm": 1.2646044492721558, "learning_rate": 1.5163919462845622e-06, "loss": 0.8509, "step": 4037 }, { "epoch": 0.8279680131228214, "grad_norm": 1.3401950597763062, "learning_rate": 1.5128775022214448e-06, "loss": 0.8673, "step": 4038 }, { "epoch": 0.8281730572072996, "grad_norm": 1.2049651145935059, "learning_rate": 1.5093668021762476e-06, "loss": 0.9335, "step": 4039 }, { "epoch": 0.8283781012917777, "grad_norm": 1.194352149963379, "learning_rate": 1.5058598476976826e-06, "loss": 0.8275, "step": 4040 }, { "epoch": 0.8285831453762559, "grad_norm": 1.213608980178833, "learning_rate": 1.5023566403328105e-06, "loss": 0.922, "step": 4041 }, { "epoch": 0.8287881894607341, "grad_norm": 1.5010591745376587, "learning_rate": 1.4988571816270402e-06, "loss": 0.9124, "step": 4042 }, { "epoch": 0.8289932335452123, "grad_norm": 1.1885143518447876, "learning_rate": 1.495361473124125e-06, "loss": 0.8621, "step": 4043 }, { "epoch": 0.8291982776296903, "grad_norm": 1.3671739101409912, "learning_rate": 1.491869516366168e-06, "loss": 0.9421, "step": 4044 }, { "epoch": 0.8294033217141685, "grad_norm": 1.2060714960098267, "learning_rate": 1.4883813128936142e-06, "loss": 0.8431, "step": 4045 }, { "epoch": 0.8296083657986467, "grad_norm": 1.3095574378967285, "learning_rate": 1.484896864245252e-06, "loss": 0.8395, "step": 4046 }, { "epoch": 0.8298134098831249, "grad_norm": 1.2671477794647217, "learning_rate": 1.4814161719582132e-06, "loss": 0.9365, "step": 4047 }, { "epoch": 0.830018453967603, "grad_norm": 1.2768391370773315, "learning_rate": 1.477939237567978e-06, "loss": 0.8486, "step": 4048 }, { "epoch": 0.8302234980520812, "grad_norm": 1.1022213697433472, "learning_rate": 1.4744660626083619e-06, "loss": 0.8548, "step": 4049 }, { "epoch": 0.8304285421365594, "grad_norm": 1.3399255275726318, "learning_rate": 1.4709966486115268e-06, "loss": 0.8412, "step": 4050 }, { "epoch": 0.8306335862210376, "grad_norm": 1.4290012121200562, "learning_rate": 1.4675309971079688e-06, "loss": 0.8815, "step": 4051 }, { "epoch": 0.8308386303055156, "grad_norm": 1.306899905204773, "learning_rate": 1.4640691096265358e-06, "loss": 0.9235, "step": 4052 }, { "epoch": 0.8310436743899938, "grad_norm": 1.2671254873275757, "learning_rate": 1.460610987694403e-06, "loss": 0.8815, "step": 4053 }, { "epoch": 0.831248718474472, "grad_norm": 1.4115947484970093, "learning_rate": 1.4571566328370956e-06, "loss": 0.9627, "step": 4054 }, { "epoch": 0.8314537625589502, "grad_norm": 1.2871606349945068, "learning_rate": 1.4537060465784636e-06, "loss": 0.8477, "step": 4055 }, { "epoch": 0.8316588066434283, "grad_norm": 1.2052627801895142, "learning_rate": 1.4502592304407094e-06, "loss": 0.886, "step": 4056 }, { "epoch": 0.8318638507279065, "grad_norm": 1.3413012027740479, "learning_rate": 1.4468161859443609e-06, "loss": 0.7803, "step": 4057 }, { "epoch": 0.8320688948123847, "grad_norm": 1.1249371767044067, "learning_rate": 1.4433769146082865e-06, "loss": 0.9079, "step": 4058 }, { "epoch": 0.8322739388968629, "grad_norm": 1.3647745847702026, "learning_rate": 1.4399414179496918e-06, "loss": 0.8213, "step": 4059 }, { "epoch": 0.832478982981341, "grad_norm": 1.3139617443084717, "learning_rate": 1.436509697484111e-06, "loss": 0.8561, "step": 4060 }, { "epoch": 0.8326840270658191, "grad_norm": 1.2319526672363281, "learning_rate": 1.4330817547254216e-06, "loss": 0.9252, "step": 4061 }, { "epoch": 0.8328890711502973, "grad_norm": 1.259634017944336, "learning_rate": 1.4296575911858268e-06, "loss": 0.8479, "step": 4062 }, { "epoch": 0.8330941152347755, "grad_norm": 1.26174795627594, "learning_rate": 1.4262372083758714e-06, "loss": 0.8056, "step": 4063 }, { "epoch": 0.8332991593192537, "grad_norm": 1.3207353353500366, "learning_rate": 1.422820607804417e-06, "loss": 0.854, "step": 4064 }, { "epoch": 0.8335042034037318, "grad_norm": 1.1811227798461914, "learning_rate": 1.4194077909786729e-06, "loss": 0.8627, "step": 4065 }, { "epoch": 0.83370924748821, "grad_norm": 1.2460744380950928, "learning_rate": 1.4159987594041702e-06, "loss": 0.9124, "step": 4066 }, { "epoch": 0.8339142915726881, "grad_norm": 1.2954713106155396, "learning_rate": 1.412593514584777e-06, "loss": 0.8802, "step": 4067 }, { "epoch": 0.8341193356571663, "grad_norm": 1.1969743967056274, "learning_rate": 1.4091920580226793e-06, "loss": 0.8469, "step": 4068 }, { "epoch": 0.8343243797416444, "grad_norm": 1.3453636169433594, "learning_rate": 1.4057943912184035e-06, "loss": 0.9523, "step": 4069 }, { "epoch": 0.8345294238261226, "grad_norm": 1.287597894668579, "learning_rate": 1.4024005156707976e-06, "loss": 0.8828, "step": 4070 }, { "epoch": 0.8347344679106008, "grad_norm": 1.1754636764526367, "learning_rate": 1.3990104328770427e-06, "loss": 0.927, "step": 4071 }, { "epoch": 0.834939511995079, "grad_norm": 1.287320613861084, "learning_rate": 1.3956241443326423e-06, "loss": 0.8513, "step": 4072 }, { "epoch": 0.8351445560795571, "grad_norm": 1.336613655090332, "learning_rate": 1.3922416515314253e-06, "loss": 0.8587, "step": 4073 }, { "epoch": 0.8353496001640353, "grad_norm": 1.3687167167663574, "learning_rate": 1.3888629559655497e-06, "loss": 0.8552, "step": 4074 }, { "epoch": 0.8355546442485134, "grad_norm": 1.373639702796936, "learning_rate": 1.3854880591254938e-06, "loss": 0.9363, "step": 4075 }, { "epoch": 0.8357596883329916, "grad_norm": 1.2125314474105835, "learning_rate": 1.3821169625000709e-06, "loss": 0.8422, "step": 4076 }, { "epoch": 0.8359647324174697, "grad_norm": 1.2802715301513672, "learning_rate": 1.378749667576399e-06, "loss": 0.8309, "step": 4077 }, { "epoch": 0.8361697765019479, "grad_norm": 1.272782564163208, "learning_rate": 1.3753861758399366e-06, "loss": 0.8888, "step": 4078 }, { "epoch": 0.8363748205864261, "grad_norm": 1.2202848196029663, "learning_rate": 1.372026488774456e-06, "loss": 0.8435, "step": 4079 }, { "epoch": 0.8365798646709043, "grad_norm": 1.2978111505508423, "learning_rate": 1.368670607862056e-06, "loss": 0.9281, "step": 4080 }, { "epoch": 0.8367849087553824, "grad_norm": 1.1819266080856323, "learning_rate": 1.365318534583151e-06, "loss": 0.8133, "step": 4081 }, { "epoch": 0.8369899528398606, "grad_norm": 1.1829369068145752, "learning_rate": 1.3619702704164783e-06, "loss": 0.9438, "step": 4082 }, { "epoch": 0.8371949969243387, "grad_norm": 1.2396656274795532, "learning_rate": 1.3586258168390931e-06, "loss": 0.912, "step": 4083 }, { "epoch": 0.8374000410088169, "grad_norm": 1.2551404237747192, "learning_rate": 1.3552851753263729e-06, "loss": 0.8864, "step": 4084 }, { "epoch": 0.837605085093295, "grad_norm": 1.1906306743621826, "learning_rate": 1.3519483473520124e-06, "loss": 0.8016, "step": 4085 }, { "epoch": 0.8378101291777732, "grad_norm": 1.4141696691513062, "learning_rate": 1.3486153343880226e-06, "loss": 0.8441, "step": 4086 }, { "epoch": 0.8380151732622514, "grad_norm": 1.2851375341415405, "learning_rate": 1.3452861379047289e-06, "loss": 0.8616, "step": 4087 }, { "epoch": 0.8382202173467296, "grad_norm": 1.2812241315841675, "learning_rate": 1.3419607593707818e-06, "loss": 0.8294, "step": 4088 }, { "epoch": 0.8384252614312077, "grad_norm": 1.307580828666687, "learning_rate": 1.3386392002531401e-06, "loss": 0.9215, "step": 4089 }, { "epoch": 0.8386303055156858, "grad_norm": 1.2942497730255127, "learning_rate": 1.3353214620170797e-06, "loss": 0.8467, "step": 4090 }, { "epoch": 0.838835349600164, "grad_norm": 1.2542918920516968, "learning_rate": 1.3320075461261905e-06, "loss": 0.9044, "step": 4091 }, { "epoch": 0.8390403936846422, "grad_norm": 1.2316782474517822, "learning_rate": 1.3286974540423747e-06, "loss": 0.906, "step": 4092 }, { "epoch": 0.8392454377691203, "grad_norm": 1.260572075843811, "learning_rate": 1.325391187225854e-06, "loss": 0.9179, "step": 4093 }, { "epoch": 0.8394504818535985, "grad_norm": 1.265196442604065, "learning_rate": 1.3220887471351574e-06, "loss": 0.943, "step": 4094 }, { "epoch": 0.8396555259380767, "grad_norm": 1.1882331371307373, "learning_rate": 1.3187901352271248e-06, "loss": 0.877, "step": 4095 }, { "epoch": 0.8398605700225549, "grad_norm": 1.1856833696365356, "learning_rate": 1.3154953529569092e-06, "loss": 0.8513, "step": 4096 }, { "epoch": 0.8400656141070331, "grad_norm": 1.2890785932540894, "learning_rate": 1.3122044017779768e-06, "loss": 0.8785, "step": 4097 }, { "epoch": 0.8402706581915111, "grad_norm": 1.3235124349594116, "learning_rate": 1.3089172831420972e-06, "loss": 0.892, "step": 4098 }, { "epoch": 0.8404757022759893, "grad_norm": 1.2931123971939087, "learning_rate": 1.3056339984993604e-06, "loss": 0.8768, "step": 4099 }, { "epoch": 0.8406807463604675, "grad_norm": 1.3058477640151978, "learning_rate": 1.3023545492981482e-06, "loss": 0.8342, "step": 4100 }, { "epoch": 0.8408857904449457, "grad_norm": 1.3345284461975098, "learning_rate": 1.2990789369851686e-06, "loss": 0.8541, "step": 4101 }, { "epoch": 0.8410908345294238, "grad_norm": 1.1561685800552368, "learning_rate": 1.2958071630054214e-06, "loss": 0.8524, "step": 4102 }, { "epoch": 0.841295878613902, "grad_norm": 1.3505942821502686, "learning_rate": 1.2925392288022299e-06, "loss": 0.862, "step": 4103 }, { "epoch": 0.8415009226983802, "grad_norm": 1.3549253940582275, "learning_rate": 1.2892751358172028e-06, "loss": 0.958, "step": 4104 }, { "epoch": 0.8417059667828584, "grad_norm": 1.3876996040344238, "learning_rate": 1.2860148854902743e-06, "loss": 0.8894, "step": 4105 }, { "epoch": 0.8419110108673364, "grad_norm": 1.396362543106079, "learning_rate": 1.2827584792596703e-06, "loss": 0.88, "step": 4106 }, { "epoch": 0.8421160549518146, "grad_norm": 1.2620669603347778, "learning_rate": 1.279505918561923e-06, "loss": 0.8709, "step": 4107 }, { "epoch": 0.8423210990362928, "grad_norm": 1.2195965051651, "learning_rate": 1.2762572048318788e-06, "loss": 0.9472, "step": 4108 }, { "epoch": 0.842526143120771, "grad_norm": 1.3663246631622314, "learning_rate": 1.2730123395026684e-06, "loss": 0.8391, "step": 4109 }, { "epoch": 0.8427311872052491, "grad_norm": 1.2699573040008545, "learning_rate": 1.2697713240057419e-06, "loss": 0.8456, "step": 4110 }, { "epoch": 0.8429362312897273, "grad_norm": 1.3806263208389282, "learning_rate": 1.2665341597708402e-06, "loss": 0.9123, "step": 4111 }, { "epoch": 0.8431412753742055, "grad_norm": 1.1763023138046265, "learning_rate": 1.2633008482260146e-06, "loss": 0.8905, "step": 4112 }, { "epoch": 0.8433463194586837, "grad_norm": 1.2975375652313232, "learning_rate": 1.2600713907976048e-06, "loss": 0.9156, "step": 4113 }, { "epoch": 0.8435513635431617, "grad_norm": 1.2622041702270508, "learning_rate": 1.2568457889102615e-06, "loss": 0.8619, "step": 4114 }, { "epoch": 0.8437564076276399, "grad_norm": 1.2730739116668701, "learning_rate": 1.2536240439869262e-06, "loss": 0.8871, "step": 4115 }, { "epoch": 0.8439614517121181, "grad_norm": 1.239452838897705, "learning_rate": 1.250406157448848e-06, "loss": 0.8915, "step": 4116 }, { "epoch": 0.8441664957965963, "grad_norm": 1.375857949256897, "learning_rate": 1.2471921307155655e-06, "loss": 0.878, "step": 4117 }, { "epoch": 0.8443715398810744, "grad_norm": 1.2667334079742432, "learning_rate": 1.2439819652049178e-06, "loss": 0.8768, "step": 4118 }, { "epoch": 0.8445765839655526, "grad_norm": 1.2399952411651611, "learning_rate": 1.2407756623330392e-06, "loss": 0.9414, "step": 4119 }, { "epoch": 0.8447816280500308, "grad_norm": 1.3897346258163452, "learning_rate": 1.2375732235143645e-06, "loss": 0.9638, "step": 4120 }, { "epoch": 0.844986672134509, "grad_norm": 1.4375869035720825, "learning_rate": 1.2343746501616206e-06, "loss": 0.8914, "step": 4121 }, { "epoch": 0.845191716218987, "grad_norm": 1.241875171661377, "learning_rate": 1.2311799436858275e-06, "loss": 0.8223, "step": 4122 }, { "epoch": 0.8453967603034652, "grad_norm": 1.3495230674743652, "learning_rate": 1.2279891054963023e-06, "loss": 0.9251, "step": 4123 }, { "epoch": 0.8456018043879434, "grad_norm": 1.2160569429397583, "learning_rate": 1.2248021370006524e-06, "loss": 0.9804, "step": 4124 }, { "epoch": 0.8458068484724216, "grad_norm": 1.2511911392211914, "learning_rate": 1.2216190396047845e-06, "loss": 0.8215, "step": 4125 }, { "epoch": 0.8460118925568997, "grad_norm": 1.1874637603759766, "learning_rate": 1.2184398147128918e-06, "loss": 0.8831, "step": 4126 }, { "epoch": 0.8462169366413779, "grad_norm": 1.2532769441604614, "learning_rate": 1.2152644637274603e-06, "loss": 0.9258, "step": 4127 }, { "epoch": 0.8464219807258561, "grad_norm": 1.2535722255706787, "learning_rate": 1.212092988049265e-06, "loss": 0.8197, "step": 4128 }, { "epoch": 0.8466270248103342, "grad_norm": 1.326887607574463, "learning_rate": 1.2089253890773789e-06, "loss": 0.8667, "step": 4129 }, { "epoch": 0.8468320688948124, "grad_norm": 1.1576809883117676, "learning_rate": 1.2057616682091556e-06, "loss": 0.8254, "step": 4130 }, { "epoch": 0.8470371129792905, "grad_norm": 1.1691808700561523, "learning_rate": 1.2026018268402474e-06, "loss": 0.8688, "step": 4131 }, { "epoch": 0.8472421570637687, "grad_norm": 1.2427388429641724, "learning_rate": 1.1994458663645836e-06, "loss": 0.9882, "step": 4132 }, { "epoch": 0.8474472011482469, "grad_norm": 1.2679375410079956, "learning_rate": 1.1962937881743918e-06, "loss": 0.9424, "step": 4133 }, { "epoch": 0.8476522452327251, "grad_norm": 1.2207263708114624, "learning_rate": 1.193145593660181e-06, "loss": 0.8968, "step": 4134 }, { "epoch": 0.8478572893172032, "grad_norm": 1.318344235420227, "learning_rate": 1.1900012842107545e-06, "loss": 0.8922, "step": 4135 }, { "epoch": 0.8480623334016814, "grad_norm": 1.275299310684204, "learning_rate": 1.1868608612131872e-06, "loss": 0.9087, "step": 4136 }, { "epoch": 0.8482673774861595, "grad_norm": 1.3086864948272705, "learning_rate": 1.1837243260528542e-06, "loss": 0.8747, "step": 4137 }, { "epoch": 0.8484724215706377, "grad_norm": 1.3679553270339966, "learning_rate": 1.18059168011341e-06, "loss": 0.916, "step": 4138 }, { "epoch": 0.8486774656551158, "grad_norm": 1.2683444023132324, "learning_rate": 1.1774629247767889e-06, "loss": 0.8842, "step": 4139 }, { "epoch": 0.848882509739594, "grad_norm": 1.3405098915100098, "learning_rate": 1.1743380614232213e-06, "loss": 0.9096, "step": 4140 }, { "epoch": 0.8490875538240722, "grad_norm": 1.2751340866088867, "learning_rate": 1.1712170914312026e-06, "loss": 0.8836, "step": 4141 }, { "epoch": 0.8492925979085504, "grad_norm": 1.1999011039733887, "learning_rate": 1.168100016177528e-06, "loss": 0.9041, "step": 4142 }, { "epoch": 0.8494976419930285, "grad_norm": 1.2468488216400146, "learning_rate": 1.164986837037264e-06, "loss": 0.9454, "step": 4143 }, { "epoch": 0.8497026860775067, "grad_norm": 1.2933865785598755, "learning_rate": 1.1618775553837657e-06, "loss": 0.8609, "step": 4144 }, { "epoch": 0.8499077301619848, "grad_norm": 1.3986374139785767, "learning_rate": 1.1587721725886581e-06, "loss": 0.9163, "step": 4145 }, { "epoch": 0.850112774246463, "grad_norm": 1.1792938709259033, "learning_rate": 1.1556706900218572e-06, "loss": 0.8593, "step": 4146 }, { "epoch": 0.8503178183309411, "grad_norm": 1.2954466342926025, "learning_rate": 1.1525731090515536e-06, "loss": 0.9177, "step": 4147 }, { "epoch": 0.8505228624154193, "grad_norm": 1.3709971904754639, "learning_rate": 1.149479431044218e-06, "loss": 0.8987, "step": 4148 }, { "epoch": 0.8507279064998975, "grad_norm": 1.237723708152771, "learning_rate": 1.1463896573645984e-06, "loss": 0.9078, "step": 4149 }, { "epoch": 0.8509329505843757, "grad_norm": 1.3088436126708984, "learning_rate": 1.1433037893757203e-06, "loss": 0.8919, "step": 4150 }, { "epoch": 0.8511379946688538, "grad_norm": 1.3525944948196411, "learning_rate": 1.1402218284388845e-06, "loss": 0.9132, "step": 4151 }, { "epoch": 0.851343038753332, "grad_norm": 1.259734869003296, "learning_rate": 1.137143775913675e-06, "loss": 0.8608, "step": 4152 }, { "epoch": 0.8515480828378101, "grad_norm": 1.343284010887146, "learning_rate": 1.1340696331579436e-06, "loss": 0.907, "step": 4153 }, { "epoch": 0.8517531269222883, "grad_norm": 1.2914953231811523, "learning_rate": 1.1309994015278225e-06, "loss": 0.888, "step": 4154 }, { "epoch": 0.8519581710067664, "grad_norm": 1.187215805053711, "learning_rate": 1.1279330823777167e-06, "loss": 0.9094, "step": 4155 }, { "epoch": 0.8521632150912446, "grad_norm": 1.372149109840393, "learning_rate": 1.1248706770603025e-06, "loss": 0.8862, "step": 4156 }, { "epoch": 0.8523682591757228, "grad_norm": 1.2631285190582275, "learning_rate": 1.1218121869265365e-06, "loss": 0.9242, "step": 4157 }, { "epoch": 0.852573303260201, "grad_norm": 1.220145344734192, "learning_rate": 1.1187576133256427e-06, "loss": 0.8796, "step": 4158 }, { "epoch": 0.852778347344679, "grad_norm": 1.227832555770874, "learning_rate": 1.1157069576051183e-06, "loss": 0.8463, "step": 4159 }, { "epoch": 0.8529833914291572, "grad_norm": 1.2914561033248901, "learning_rate": 1.1126602211107317e-06, "loss": 0.912, "step": 4160 }, { "epoch": 0.8531884355136354, "grad_norm": 1.3197684288024902, "learning_rate": 1.109617405186526e-06, "loss": 0.8608, "step": 4161 }, { "epoch": 0.8533934795981136, "grad_norm": 1.3027318716049194, "learning_rate": 1.1065785111748117e-06, "loss": 0.8191, "step": 4162 }, { "epoch": 0.8535985236825917, "grad_norm": 1.2602895498275757, "learning_rate": 1.1035435404161688e-06, "loss": 0.8033, "step": 4163 }, { "epoch": 0.8538035677670699, "grad_norm": 1.2455930709838867, "learning_rate": 1.100512494249445e-06, "loss": 0.825, "step": 4164 }, { "epoch": 0.8540086118515481, "grad_norm": 1.1843311786651611, "learning_rate": 1.097485374011763e-06, "loss": 0.8839, "step": 4165 }, { "epoch": 0.8542136559360263, "grad_norm": 1.2569806575775146, "learning_rate": 1.094462181038508e-06, "loss": 0.8898, "step": 4166 }, { "epoch": 0.8544187000205045, "grad_norm": 1.2133638858795166, "learning_rate": 1.0914429166633355e-06, "loss": 0.8434, "step": 4167 }, { "epoch": 0.8546237441049825, "grad_norm": 1.281834363937378, "learning_rate": 1.0884275822181634e-06, "loss": 0.8446, "step": 4168 }, { "epoch": 0.8548287881894607, "grad_norm": 1.2682753801345825, "learning_rate": 1.0854161790331852e-06, "loss": 0.8535, "step": 4169 }, { "epoch": 0.8550338322739389, "grad_norm": 1.2703676223754883, "learning_rate": 1.0824087084368494e-06, "loss": 0.9445, "step": 4170 }, { "epoch": 0.8552388763584171, "grad_norm": 1.3848639726638794, "learning_rate": 1.0794051717558784e-06, "loss": 0.8801, "step": 4171 }, { "epoch": 0.8554439204428952, "grad_norm": 1.3124866485595703, "learning_rate": 1.076405570315252e-06, "loss": 0.8817, "step": 4172 }, { "epoch": 0.8556489645273734, "grad_norm": 1.1826820373535156, "learning_rate": 1.0734099054382186e-06, "loss": 0.9022, "step": 4173 }, { "epoch": 0.8558540086118516, "grad_norm": 1.2880465984344482, "learning_rate": 1.0704181784462896e-06, "loss": 0.8814, "step": 4174 }, { "epoch": 0.8560590526963298, "grad_norm": 1.2154546976089478, "learning_rate": 1.0674303906592375e-06, "loss": 0.856, "step": 4175 }, { "epoch": 0.8562640967808078, "grad_norm": 1.239269495010376, "learning_rate": 1.0644465433951024e-06, "loss": 0.9115, "step": 4176 }, { "epoch": 0.856469140865286, "grad_norm": 1.3904800415039062, "learning_rate": 1.0614666379701732e-06, "loss": 0.9634, "step": 4177 }, { "epoch": 0.8566741849497642, "grad_norm": 1.298921823501587, "learning_rate": 1.0584906756990165e-06, "loss": 0.8553, "step": 4178 }, { "epoch": 0.8568792290342424, "grad_norm": 1.2687633037567139, "learning_rate": 1.0555186578944453e-06, "loss": 0.8984, "step": 4179 }, { "epoch": 0.8570842731187205, "grad_norm": 1.1381899118423462, "learning_rate": 1.0525505858675467e-06, "loss": 0.7983, "step": 4180 }, { "epoch": 0.8572893172031987, "grad_norm": 1.2748578786849976, "learning_rate": 1.0495864609276486e-06, "loss": 0.8758, "step": 4181 }, { "epoch": 0.8574943612876769, "grad_norm": 1.1968029737472534, "learning_rate": 1.046626284382356e-06, "loss": 0.8989, "step": 4182 }, { "epoch": 0.857699405372155, "grad_norm": 1.2639150619506836, "learning_rate": 1.0436700575375192e-06, "loss": 0.9092, "step": 4183 }, { "epoch": 0.8579044494566331, "grad_norm": 1.2414230108261108, "learning_rate": 1.0407177816972558e-06, "loss": 0.8673, "step": 4184 }, { "epoch": 0.8581094935411113, "grad_norm": 1.2082312107086182, "learning_rate": 1.037769458163933e-06, "loss": 0.9127, "step": 4185 }, { "epoch": 0.8583145376255895, "grad_norm": 1.264542579650879, "learning_rate": 1.0348250882381782e-06, "loss": 0.8841, "step": 4186 }, { "epoch": 0.8585195817100677, "grad_norm": 1.3492646217346191, "learning_rate": 1.0318846732188737e-06, "loss": 0.8216, "step": 4187 }, { "epoch": 0.8587246257945458, "grad_norm": 1.2120202779769897, "learning_rate": 1.0289482144031538e-06, "loss": 0.8441, "step": 4188 }, { "epoch": 0.858929669879024, "grad_norm": 1.1711379289627075, "learning_rate": 1.0260157130864178e-06, "loss": 0.8605, "step": 4189 }, { "epoch": 0.8591347139635022, "grad_norm": 1.3153507709503174, "learning_rate": 1.0230871705623058e-06, "loss": 0.9068, "step": 4190 }, { "epoch": 0.8593397580479804, "grad_norm": 1.1947953701019287, "learning_rate": 1.0201625881227217e-06, "loss": 0.8631, "step": 4191 }, { "epoch": 0.8595448021324584, "grad_norm": 1.2075085639953613, "learning_rate": 1.017241967057816e-06, "loss": 0.9048, "step": 4192 }, { "epoch": 0.8597498462169366, "grad_norm": 1.2573373317718506, "learning_rate": 1.0143253086559979e-06, "loss": 0.8692, "step": 4193 }, { "epoch": 0.8599548903014148, "grad_norm": 1.3307644128799438, "learning_rate": 1.0114126142039238e-06, "loss": 0.8989, "step": 4194 }, { "epoch": 0.860159934385893, "grad_norm": 1.247575044631958, "learning_rate": 1.0085038849865025e-06, "loss": 0.862, "step": 4195 }, { "epoch": 0.8603649784703711, "grad_norm": 1.343164324760437, "learning_rate": 1.0055991222868921e-06, "loss": 0.9263, "step": 4196 }, { "epoch": 0.8605700225548493, "grad_norm": 1.2663766145706177, "learning_rate": 1.0026983273865055e-06, "loss": 0.9075, "step": 4197 }, { "epoch": 0.8607750666393275, "grad_norm": 1.2961190938949585, "learning_rate": 9.99801501565002e-07, "loss": 0.9309, "step": 4198 }, { "epoch": 0.8609801107238056, "grad_norm": 1.1873713731765747, "learning_rate": 9.969086461002887e-07, "loss": 0.8034, "step": 4199 }, { "epoch": 0.8611851548082837, "grad_norm": 1.3740674257278442, "learning_rate": 9.940197622685211e-07, "loss": 0.9466, "step": 4200 }, { "epoch": 0.8613901988927619, "grad_norm": 1.2380897998809814, "learning_rate": 9.911348513441089e-07, "loss": 0.8857, "step": 4201 }, { "epoch": 0.8615952429772401, "grad_norm": 1.2720545530319214, "learning_rate": 9.882539145997027e-07, "loss": 0.9362, "step": 4202 }, { "epoch": 0.8618002870617183, "grad_norm": 1.2430847883224487, "learning_rate": 9.853769533062008e-07, "loss": 0.8981, "step": 4203 }, { "epoch": 0.8620053311461965, "grad_norm": 1.1922091245651245, "learning_rate": 9.82503968732751e-07, "loss": 0.8359, "step": 4204 }, { "epoch": 0.8622103752306746, "grad_norm": 1.2169052362442017, "learning_rate": 9.796349621467405e-07, "loss": 0.9256, "step": 4205 }, { "epoch": 0.8624154193151528, "grad_norm": 1.3736833333969116, "learning_rate": 9.767699348138116e-07, "loss": 0.8447, "step": 4206 }, { "epoch": 0.8626204633996309, "grad_norm": 1.2443345785140991, "learning_rate": 9.739088879978409e-07, "loss": 0.8719, "step": 4207 }, { "epoch": 0.8628255074841091, "grad_norm": 1.2422282695770264, "learning_rate": 9.710518229609566e-07, "loss": 0.8123, "step": 4208 }, { "epoch": 0.8630305515685872, "grad_norm": 1.2336243391036987, "learning_rate": 9.68198740963523e-07, "loss": 0.8616, "step": 4209 }, { "epoch": 0.8632355956530654, "grad_norm": 1.247429609298706, "learning_rate": 9.653496432641575e-07, "loss": 0.8612, "step": 4210 }, { "epoch": 0.8634406397375436, "grad_norm": 1.2092245817184448, "learning_rate": 9.62504531119708e-07, "loss": 0.8454, "step": 4211 }, { "epoch": 0.8636456838220218, "grad_norm": 1.2165449857711792, "learning_rate": 9.59663405785277e-07, "loss": 0.8545, "step": 4212 }, { "epoch": 0.8638507279064999, "grad_norm": 1.2912750244140625, "learning_rate": 9.568262685141938e-07, "loss": 0.8551, "step": 4213 }, { "epoch": 0.864055771990978, "grad_norm": 1.2448457479476929, "learning_rate": 9.53993120558041e-07, "loss": 0.9187, "step": 4214 }, { "epoch": 0.8642608160754562, "grad_norm": 1.3640508651733398, "learning_rate": 9.511639631666347e-07, "loss": 0.8187, "step": 4215 }, { "epoch": 0.8644658601599344, "grad_norm": 1.3074989318847656, "learning_rate": 9.483387975880354e-07, "loss": 0.8712, "step": 4216 }, { "epoch": 0.8646709042444125, "grad_norm": 1.1707929372787476, "learning_rate": 9.455176250685338e-07, "loss": 0.8877, "step": 4217 }, { "epoch": 0.8648759483288907, "grad_norm": 1.2708323001861572, "learning_rate": 9.427004468526702e-07, "loss": 0.8553, "step": 4218 }, { "epoch": 0.8650809924133689, "grad_norm": 1.2548812627792358, "learning_rate": 9.398872641832147e-07, "loss": 0.9157, "step": 4219 }, { "epoch": 0.8652860364978471, "grad_norm": 1.1188781261444092, "learning_rate": 9.370780783011757e-07, "loss": 0.8708, "step": 4220 }, { "epoch": 0.8654910805823252, "grad_norm": 1.1735734939575195, "learning_rate": 9.342728904458065e-07, "loss": 0.8019, "step": 4221 }, { "epoch": 0.8656961246668033, "grad_norm": 1.2406796216964722, "learning_rate": 9.314717018545838e-07, "loss": 0.8687, "step": 4222 }, { "epoch": 0.8659011687512815, "grad_norm": 1.3138947486877441, "learning_rate": 9.286745137632314e-07, "loss": 0.8844, "step": 4223 }, { "epoch": 0.8661062128357597, "grad_norm": 1.1539207696914673, "learning_rate": 9.258813274056999e-07, "loss": 0.8853, "step": 4224 }, { "epoch": 0.8663112569202378, "grad_norm": 1.4402494430541992, "learning_rate": 9.230921440141849e-07, "loss": 0.8987, "step": 4225 }, { "epoch": 0.866516301004716, "grad_norm": 1.2919374704360962, "learning_rate": 9.203069648191009e-07, "loss": 0.9426, "step": 4226 }, { "epoch": 0.8667213450891942, "grad_norm": 1.3142037391662598, "learning_rate": 9.17525791049112e-07, "loss": 0.9121, "step": 4227 }, { "epoch": 0.8669263891736724, "grad_norm": 1.2716472148895264, "learning_rate": 9.147486239311032e-07, "loss": 0.8757, "step": 4228 }, { "epoch": 0.8671314332581505, "grad_norm": 1.198325514793396, "learning_rate": 9.119754646902002e-07, "loss": 0.8251, "step": 4229 }, { "epoch": 0.8673364773426286, "grad_norm": 1.231846570968628, "learning_rate": 9.092063145497553e-07, "loss": 0.8448, "step": 4230 }, { "epoch": 0.8675415214271068, "grad_norm": 1.2352181673049927, "learning_rate": 9.064411747313562e-07, "loss": 0.8924, "step": 4231 }, { "epoch": 0.867746565511585, "grad_norm": 1.2780226469039917, "learning_rate": 9.036800464548157e-07, "loss": 0.9533, "step": 4232 }, { "epoch": 0.8679516095960631, "grad_norm": 1.2738804817199707, "learning_rate": 9.00922930938185e-07, "loss": 0.8668, "step": 4233 }, { "epoch": 0.8681566536805413, "grad_norm": 1.2267800569534302, "learning_rate": 8.981698293977392e-07, "loss": 0.8361, "step": 4234 }, { "epoch": 0.8683616977650195, "grad_norm": 1.267863154411316, "learning_rate": 8.954207430479844e-07, "loss": 0.8936, "step": 4235 }, { "epoch": 0.8685667418494977, "grad_norm": 1.487202525138855, "learning_rate": 8.926756731016551e-07, "loss": 0.8324, "step": 4236 }, { "epoch": 0.8687717859339759, "grad_norm": 1.2705984115600586, "learning_rate": 8.899346207697135e-07, "loss": 0.9454, "step": 4237 }, { "epoch": 0.8689768300184539, "grad_norm": 1.372297763824463, "learning_rate": 8.871975872613514e-07, "loss": 0.891, "step": 4238 }, { "epoch": 0.8691818741029321, "grad_norm": 1.178321361541748, "learning_rate": 8.844645737839874e-07, "loss": 0.8304, "step": 4239 }, { "epoch": 0.8693869181874103, "grad_norm": 1.2391139268875122, "learning_rate": 8.817355815432638e-07, "loss": 0.8303, "step": 4240 }, { "epoch": 0.8695919622718885, "grad_norm": 1.293843388557434, "learning_rate": 8.790106117430508e-07, "loss": 0.8574, "step": 4241 }, { "epoch": 0.8697970063563666, "grad_norm": 1.2984657287597656, "learning_rate": 8.762896655854481e-07, "loss": 0.8593, "step": 4242 }, { "epoch": 0.8700020504408448, "grad_norm": 1.2712438106536865, "learning_rate": 8.735727442707731e-07, "loss": 0.9085, "step": 4243 }, { "epoch": 0.870207094525323, "grad_norm": 1.2437286376953125, "learning_rate": 8.708598489975728e-07, "loss": 0.8469, "step": 4244 }, { "epoch": 0.8704121386098012, "grad_norm": 1.2822067737579346, "learning_rate": 8.681509809626132e-07, "loss": 0.9227, "step": 4245 }, { "epoch": 0.8706171826942792, "grad_norm": 1.2045756578445435, "learning_rate": 8.654461413608928e-07, "loss": 0.8895, "step": 4246 }, { "epoch": 0.8708222267787574, "grad_norm": 1.1975458860397339, "learning_rate": 8.627453313856249e-07, "loss": 0.8001, "step": 4247 }, { "epoch": 0.8710272708632356, "grad_norm": 1.2222340106964111, "learning_rate": 8.600485522282464e-07, "loss": 0.8396, "step": 4248 }, { "epoch": 0.8712323149477138, "grad_norm": 1.293338418006897, "learning_rate": 8.57355805078417e-07, "loss": 0.8464, "step": 4249 }, { "epoch": 0.8714373590321919, "grad_norm": 1.246055006980896, "learning_rate": 8.546670911240196e-07, "loss": 0.7789, "step": 4250 }, { "epoch": 0.8716424031166701, "grad_norm": 1.310815691947937, "learning_rate": 8.519824115511566e-07, "loss": 0.9503, "step": 4251 }, { "epoch": 0.8718474472011483, "grad_norm": 1.358728051185608, "learning_rate": 8.493017675441495e-07, "loss": 0.8406, "step": 4252 }, { "epoch": 0.8720524912856265, "grad_norm": 1.1800625324249268, "learning_rate": 8.466251602855391e-07, "loss": 0.8189, "step": 4253 }, { "epoch": 0.8722575353701045, "grad_norm": 1.2911242246627808, "learning_rate": 8.439525909560875e-07, "loss": 0.8964, "step": 4254 }, { "epoch": 0.8724625794545827, "grad_norm": 1.285041093826294, "learning_rate": 8.41284060734775e-07, "loss": 0.8119, "step": 4255 }, { "epoch": 0.8726676235390609, "grad_norm": 1.3039085865020752, "learning_rate": 8.386195707987987e-07, "loss": 0.8882, "step": 4256 }, { "epoch": 0.8728726676235391, "grad_norm": 1.298984169960022, "learning_rate": 8.359591223235785e-07, "loss": 0.8716, "step": 4257 }, { "epoch": 0.8730777117080172, "grad_norm": 1.204761266708374, "learning_rate": 8.333027164827412e-07, "loss": 0.7808, "step": 4258 }, { "epoch": 0.8732827557924954, "grad_norm": 1.19073486328125, "learning_rate": 8.306503544481415e-07, "loss": 0.7937, "step": 4259 }, { "epoch": 0.8734877998769736, "grad_norm": 1.2374621629714966, "learning_rate": 8.280020373898401e-07, "loss": 0.8479, "step": 4260 }, { "epoch": 0.8736928439614517, "grad_norm": 1.234334945678711, "learning_rate": 8.253577664761259e-07, "loss": 0.8145, "step": 4261 }, { "epoch": 0.8738978880459298, "grad_norm": 1.327333927154541, "learning_rate": 8.227175428734868e-07, "loss": 0.9364, "step": 4262 }, { "epoch": 0.874102932130408, "grad_norm": 1.2282094955444336, "learning_rate": 8.200813677466391e-07, "loss": 0.8234, "step": 4263 }, { "epoch": 0.8743079762148862, "grad_norm": 1.191251277923584, "learning_rate": 8.174492422585045e-07, "loss": 0.8158, "step": 4264 }, { "epoch": 0.8745130202993644, "grad_norm": 1.2203468084335327, "learning_rate": 8.148211675702244e-07, "loss": 0.8598, "step": 4265 }, { "epoch": 0.8747180643838425, "grad_norm": 1.1915911436080933, "learning_rate": 8.121971448411504e-07, "loss": 0.8847, "step": 4266 }, { "epoch": 0.8749231084683207, "grad_norm": 1.3418627977371216, "learning_rate": 8.095771752288451e-07, "loss": 0.8568, "step": 4267 }, { "epoch": 0.8751281525527989, "grad_norm": 1.2750736474990845, "learning_rate": 8.069612598890852e-07, "loss": 0.8797, "step": 4268 }, { "epoch": 0.875333196637277, "grad_norm": 1.215309739112854, "learning_rate": 8.043493999758556e-07, "loss": 0.8938, "step": 4269 }, { "epoch": 0.8755382407217551, "grad_norm": 1.265525221824646, "learning_rate": 8.017415966413611e-07, "loss": 0.9289, "step": 4270 }, { "epoch": 0.8757432848062333, "grad_norm": 1.3641877174377441, "learning_rate": 7.991378510360038e-07, "loss": 0.966, "step": 4271 }, { "epoch": 0.8759483288907115, "grad_norm": 1.278849720954895, "learning_rate": 7.965381643084069e-07, "loss": 0.7848, "step": 4272 }, { "epoch": 0.8761533729751897, "grad_norm": 1.1524637937545776, "learning_rate": 7.939425376053955e-07, "loss": 0.8987, "step": 4273 }, { "epoch": 0.8763584170596679, "grad_norm": 1.1919833421707153, "learning_rate": 7.913509720720125e-07, "loss": 0.8989, "step": 4274 }, { "epoch": 0.876563461144146, "grad_norm": 1.2477924823760986, "learning_rate": 7.887634688515e-07, "loss": 0.8838, "step": 4275 }, { "epoch": 0.8767685052286242, "grad_norm": 1.2139484882354736, "learning_rate": 7.861800290853116e-07, "loss": 0.8563, "step": 4276 }, { "epoch": 0.8769735493131023, "grad_norm": 1.2177835702896118, "learning_rate": 7.83600653913108e-07, "loss": 0.8904, "step": 4277 }, { "epoch": 0.8771785933975805, "grad_norm": 1.2360764741897583, "learning_rate": 7.810253444727611e-07, "loss": 0.9295, "step": 4278 }, { "epoch": 0.8773836374820586, "grad_norm": 1.3324041366577148, "learning_rate": 7.784541019003422e-07, "loss": 0.8348, "step": 4279 }, { "epoch": 0.8775886815665368, "grad_norm": 1.2121018171310425, "learning_rate": 7.758869273301339e-07, "loss": 0.9122, "step": 4280 }, { "epoch": 0.877793725651015, "grad_norm": 1.256191611289978, "learning_rate": 7.733238218946193e-07, "loss": 0.9063, "step": 4281 }, { "epoch": 0.8779987697354932, "grad_norm": 1.2000936269760132, "learning_rate": 7.707647867244927e-07, "loss": 0.9163, "step": 4282 }, { "epoch": 0.8782038138199713, "grad_norm": 1.3013052940368652, "learning_rate": 7.682098229486478e-07, "loss": 0.9125, "step": 4283 }, { "epoch": 0.8784088579044494, "grad_norm": 1.212724208831787, "learning_rate": 7.656589316941843e-07, "loss": 0.7981, "step": 4284 }, { "epoch": 0.8786139019889276, "grad_norm": 1.2247954607009888, "learning_rate": 7.631121140864062e-07, "loss": 0.8412, "step": 4285 }, { "epoch": 0.8788189460734058, "grad_norm": 1.229393720626831, "learning_rate": 7.605693712488149e-07, "loss": 0.8631, "step": 4286 }, { "epoch": 0.8790239901578839, "grad_norm": 1.211737036705017, "learning_rate": 7.580307043031232e-07, "loss": 0.8387, "step": 4287 }, { "epoch": 0.8792290342423621, "grad_norm": 1.2259066104888916, "learning_rate": 7.554961143692385e-07, "loss": 0.9467, "step": 4288 }, { "epoch": 0.8794340783268403, "grad_norm": 1.2297813892364502, "learning_rate": 7.529656025652765e-07, "loss": 0.9066, "step": 4289 }, { "epoch": 0.8796391224113185, "grad_norm": 1.1933847665786743, "learning_rate": 7.504391700075441e-07, "loss": 0.8416, "step": 4290 }, { "epoch": 0.8798441664957966, "grad_norm": 1.2050938606262207, "learning_rate": 7.479168178105578e-07, "loss": 0.9428, "step": 4291 }, { "epoch": 0.8800492105802747, "grad_norm": 1.2190394401550293, "learning_rate": 7.453985470870284e-07, "loss": 0.8392, "step": 4292 }, { "epoch": 0.8802542546647529, "grad_norm": 1.2617554664611816, "learning_rate": 7.42884358947874e-07, "loss": 0.8987, "step": 4293 }, { "epoch": 0.8804592987492311, "grad_norm": 1.2988746166229248, "learning_rate": 7.403742545021986e-07, "loss": 0.879, "step": 4294 }, { "epoch": 0.8806643428337092, "grad_norm": 1.2706575393676758, "learning_rate": 7.378682348573163e-07, "loss": 0.893, "step": 4295 }, { "epoch": 0.8808693869181874, "grad_norm": 1.232078194618225, "learning_rate": 7.353663011187362e-07, "loss": 0.854, "step": 4296 }, { "epoch": 0.8810744310026656, "grad_norm": 1.289503574371338, "learning_rate": 7.328684543901598e-07, "loss": 0.9189, "step": 4297 }, { "epoch": 0.8812794750871438, "grad_norm": 1.313483715057373, "learning_rate": 7.303746957734936e-07, "loss": 0.9183, "step": 4298 }, { "epoch": 0.8814845191716218, "grad_norm": 1.3521406650543213, "learning_rate": 7.278850263688353e-07, "loss": 0.8468, "step": 4299 }, { "epoch": 0.8816895632561, "grad_norm": 1.3915915489196777, "learning_rate": 7.253994472744819e-07, "loss": 0.9693, "step": 4300 }, { "epoch": 0.8818946073405782, "grad_norm": 1.3369218111038208, "learning_rate": 7.229179595869195e-07, "loss": 0.8503, "step": 4301 }, { "epoch": 0.8820996514250564, "grad_norm": 1.299930453300476, "learning_rate": 7.204405644008416e-07, "loss": 0.8843, "step": 4302 }, { "epoch": 0.8823046955095345, "grad_norm": 1.3143885135650635, "learning_rate": 7.179672628091206e-07, "loss": 0.8886, "step": 4303 }, { "epoch": 0.8825097395940127, "grad_norm": 1.2080360651016235, "learning_rate": 7.154980559028379e-07, "loss": 0.8681, "step": 4304 }, { "epoch": 0.8827147836784909, "grad_norm": 1.3370577096939087, "learning_rate": 7.130329447712581e-07, "loss": 0.9275, "step": 4305 }, { "epoch": 0.8829198277629691, "grad_norm": 1.2419352531433105, "learning_rate": 7.105719305018443e-07, "loss": 0.8623, "step": 4306 }, { "epoch": 0.8831248718474471, "grad_norm": 1.2834923267364502, "learning_rate": 7.081150141802518e-07, "loss": 0.9503, "step": 4307 }, { "epoch": 0.8833299159319253, "grad_norm": 1.2093861103057861, "learning_rate": 7.056621968903266e-07, "loss": 0.8378, "step": 4308 }, { "epoch": 0.8835349600164035, "grad_norm": 1.3499125242233276, "learning_rate": 7.032134797141043e-07, "loss": 0.851, "step": 4309 }, { "epoch": 0.8837400041008817, "grad_norm": 1.1895473003387451, "learning_rate": 7.007688637318188e-07, "loss": 0.7549, "step": 4310 }, { "epoch": 0.8839450481853599, "grad_norm": 1.1919598579406738, "learning_rate": 6.983283500218885e-07, "loss": 0.8259, "step": 4311 }, { "epoch": 0.884150092269838, "grad_norm": 1.3462077379226685, "learning_rate": 6.958919396609231e-07, "loss": 0.9406, "step": 4312 }, { "epoch": 0.8843551363543162, "grad_norm": 1.284951090812683, "learning_rate": 6.934596337237232e-07, "loss": 0.9128, "step": 4313 }, { "epoch": 0.8845601804387944, "grad_norm": 1.2573989629745483, "learning_rate": 6.910314332832813e-07, "loss": 0.9383, "step": 4314 }, { "epoch": 0.8847652245232726, "grad_norm": 1.1918092966079712, "learning_rate": 6.886073394107751e-07, "loss": 0.8465, "step": 4315 }, { "epoch": 0.8849702686077506, "grad_norm": 1.2913970947265625, "learning_rate": 6.861873531755714e-07, "loss": 0.8728, "step": 4316 }, { "epoch": 0.8851753126922288, "grad_norm": 1.2487355470657349, "learning_rate": 6.837714756452241e-07, "loss": 0.8715, "step": 4317 }, { "epoch": 0.885380356776707, "grad_norm": 1.2242176532745361, "learning_rate": 6.813597078854772e-07, "loss": 0.8426, "step": 4318 }, { "epoch": 0.8855854008611852, "grad_norm": 1.217686414718628, "learning_rate": 6.789520509602609e-07, "loss": 0.8595, "step": 4319 }, { "epoch": 0.8857904449456633, "grad_norm": 1.2357759475708008, "learning_rate": 6.765485059316912e-07, "loss": 0.8778, "step": 4320 }, { "epoch": 0.8859954890301415, "grad_norm": 1.307164192199707, "learning_rate": 6.741490738600709e-07, "loss": 0.9645, "step": 4321 }, { "epoch": 0.8862005331146197, "grad_norm": 1.1894468069076538, "learning_rate": 6.717537558038845e-07, "loss": 0.8189, "step": 4322 }, { "epoch": 0.8864055771990978, "grad_norm": 1.3235282897949219, "learning_rate": 6.6936255281981e-07, "loss": 0.8677, "step": 4323 }, { "epoch": 0.8866106212835759, "grad_norm": 1.3341542482376099, "learning_rate": 6.669754659627003e-07, "loss": 0.8763, "step": 4324 }, { "epoch": 0.8868156653680541, "grad_norm": 1.2654786109924316, "learning_rate": 6.645924962856043e-07, "loss": 0.8004, "step": 4325 }, { "epoch": 0.8870207094525323, "grad_norm": 1.2628878355026245, "learning_rate": 6.622136448397398e-07, "loss": 0.8439, "step": 4326 }, { "epoch": 0.8872257535370105, "grad_norm": 1.1551804542541504, "learning_rate": 6.598389126745209e-07, "loss": 0.9126, "step": 4327 }, { "epoch": 0.8874307976214886, "grad_norm": 1.2571794986724854, "learning_rate": 6.574683008375371e-07, "loss": 0.8099, "step": 4328 }, { "epoch": 0.8876358417059668, "grad_norm": 1.2088242769241333, "learning_rate": 6.551018103745632e-07, "loss": 0.8229, "step": 4329 }, { "epoch": 0.887840885790445, "grad_norm": 1.2973973751068115, "learning_rate": 6.527394423295541e-07, "loss": 0.9024, "step": 4330 }, { "epoch": 0.8880459298749231, "grad_norm": 1.2472659349441528, "learning_rate": 6.503811977446506e-07, "loss": 0.8356, "step": 4331 }, { "epoch": 0.8882509739594012, "grad_norm": 1.4941359758377075, "learning_rate": 6.480270776601682e-07, "loss": 0.9739, "step": 4332 }, { "epoch": 0.8884560180438794, "grad_norm": 1.1449092626571655, "learning_rate": 6.456770831146042e-07, "loss": 0.8854, "step": 4333 }, { "epoch": 0.8886610621283576, "grad_norm": 1.2878930568695068, "learning_rate": 6.433312151446447e-07, "loss": 0.8871, "step": 4334 }, { "epoch": 0.8888661062128358, "grad_norm": 1.1188584566116333, "learning_rate": 6.409894747851386e-07, "loss": 0.8209, "step": 4335 }, { "epoch": 0.8890711502973139, "grad_norm": 1.3172892332077026, "learning_rate": 6.386518630691319e-07, "loss": 0.8471, "step": 4336 }, { "epoch": 0.8892761943817921, "grad_norm": 1.3043218851089478, "learning_rate": 6.36318381027835e-07, "loss": 1.0055, "step": 4337 }, { "epoch": 0.8894812384662703, "grad_norm": 1.3074368238449097, "learning_rate": 6.339890296906493e-07, "loss": 0.8877, "step": 4338 }, { "epoch": 0.8896862825507484, "grad_norm": 1.3137775659561157, "learning_rate": 6.316638100851391e-07, "loss": 0.9786, "step": 4339 }, { "epoch": 0.8898913266352265, "grad_norm": 1.2893462181091309, "learning_rate": 6.293427232370608e-07, "loss": 0.8843, "step": 4340 }, { "epoch": 0.8900963707197047, "grad_norm": 1.365407109260559, "learning_rate": 6.270257701703364e-07, "loss": 0.9091, "step": 4341 }, { "epoch": 0.8903014148041829, "grad_norm": 1.3145684003829956, "learning_rate": 6.247129519070728e-07, "loss": 0.882, "step": 4342 }, { "epoch": 0.8905064588886611, "grad_norm": 1.287549614906311, "learning_rate": 6.224042694675469e-07, "loss": 0.9095, "step": 4343 }, { "epoch": 0.8907115029731393, "grad_norm": 1.3823286294937134, "learning_rate": 6.200997238702145e-07, "loss": 0.934, "step": 4344 }, { "epoch": 0.8909165470576174, "grad_norm": 1.298783779144287, "learning_rate": 6.17799316131702e-07, "loss": 0.9625, "step": 4345 }, { "epoch": 0.8911215911420955, "grad_norm": 1.3037484884262085, "learning_rate": 6.155030472668177e-07, "loss": 0.9002, "step": 4346 }, { "epoch": 0.8913266352265737, "grad_norm": 1.3899328708648682, "learning_rate": 6.132109182885382e-07, "loss": 0.8801, "step": 4347 }, { "epoch": 0.8915316793110519, "grad_norm": 1.2481591701507568, "learning_rate": 6.109229302080155e-07, "loss": 0.8654, "step": 4348 }, { "epoch": 0.89173672339553, "grad_norm": 1.280078411102295, "learning_rate": 6.086390840345758e-07, "loss": 0.8599, "step": 4349 }, { "epoch": 0.8919417674800082, "grad_norm": 1.275270700454712, "learning_rate": 6.063593807757151e-07, "loss": 0.854, "step": 4350 }, { "epoch": 0.8921468115644864, "grad_norm": 1.1971580982208252, "learning_rate": 6.040838214371069e-07, "loss": 0.8831, "step": 4351 }, { "epoch": 0.8923518556489646, "grad_norm": 1.191185712814331, "learning_rate": 6.018124070225928e-07, "loss": 0.9332, "step": 4352 }, { "epoch": 0.8925568997334427, "grad_norm": 1.2016414403915405, "learning_rate": 5.995451385341855e-07, "loss": 0.8595, "step": 4353 }, { "epoch": 0.8927619438179208, "grad_norm": 1.2593722343444824, "learning_rate": 5.972820169720705e-07, "loss": 0.8033, "step": 4354 }, { "epoch": 0.892966987902399, "grad_norm": 1.2510864734649658, "learning_rate": 5.95023043334606e-07, "loss": 0.8151, "step": 4355 }, { "epoch": 0.8931720319868772, "grad_norm": 1.2021074295043945, "learning_rate": 5.92768218618317e-07, "loss": 0.8179, "step": 4356 }, { "epoch": 0.8933770760713553, "grad_norm": 1.3158106803894043, "learning_rate": 5.905175438178979e-07, "loss": 0.8697, "step": 4357 }, { "epoch": 0.8935821201558335, "grad_norm": 1.2961095571517944, "learning_rate": 5.882710199262121e-07, "loss": 0.7739, "step": 4358 }, { "epoch": 0.8937871642403117, "grad_norm": 1.2722910642623901, "learning_rate": 5.86028647934298e-07, "loss": 0.8974, "step": 4359 }, { "epoch": 0.8939922083247899, "grad_norm": 1.1346054077148438, "learning_rate": 5.837904288313545e-07, "loss": 0.7894, "step": 4360 }, { "epoch": 0.894197252409268, "grad_norm": 1.2502225637435913, "learning_rate": 5.815563636047539e-07, "loss": 0.8345, "step": 4361 }, { "epoch": 0.8944022964937461, "grad_norm": 1.3240137100219727, "learning_rate": 5.793264532400311e-07, "loss": 0.9013, "step": 4362 }, { "epoch": 0.8946073405782243, "grad_norm": 1.282681941986084, "learning_rate": 5.77100698720895e-07, "loss": 0.8645, "step": 4363 }, { "epoch": 0.8948123846627025, "grad_norm": 1.2522019147872925, "learning_rate": 5.748791010292143e-07, "loss": 0.8952, "step": 4364 }, { "epoch": 0.8950174287471806, "grad_norm": 1.2311521768569946, "learning_rate": 5.726616611450275e-07, "loss": 0.9011, "step": 4365 }, { "epoch": 0.8952224728316588, "grad_norm": 1.2936757802963257, "learning_rate": 5.704483800465388e-07, "loss": 0.8462, "step": 4366 }, { "epoch": 0.895427516916137, "grad_norm": 1.3615728616714478, "learning_rate": 5.68239258710116e-07, "loss": 0.8918, "step": 4367 }, { "epoch": 0.8956325610006152, "grad_norm": 1.2557873725891113, "learning_rate": 5.660342981102962e-07, "loss": 0.873, "step": 4368 }, { "epoch": 0.8958376050850932, "grad_norm": 1.1976110935211182, "learning_rate": 5.638334992197736e-07, "loss": 0.8533, "step": 4369 }, { "epoch": 0.8960426491695714, "grad_norm": 1.2575809955596924, "learning_rate": 5.61636863009416e-07, "loss": 0.8255, "step": 4370 }, { "epoch": 0.8962476932540496, "grad_norm": 1.1756848096847534, "learning_rate": 5.594443904482439e-07, "loss": 0.8941, "step": 4371 }, { "epoch": 0.8964527373385278, "grad_norm": 1.2477672100067139, "learning_rate": 5.572560825034523e-07, "loss": 0.8838, "step": 4372 }, { "epoch": 0.8966577814230059, "grad_norm": 1.1484012603759766, "learning_rate": 5.550719401403892e-07, "loss": 0.848, "step": 4373 }, { "epoch": 0.8968628255074841, "grad_norm": 1.1824703216552734, "learning_rate": 5.528919643225749e-07, "loss": 0.8641, "step": 4374 }, { "epoch": 0.8970678695919623, "grad_norm": 1.3200446367263794, "learning_rate": 5.507161560116791e-07, "loss": 0.8414, "step": 4375 }, { "epoch": 0.8972729136764405, "grad_norm": 1.3006681203842163, "learning_rate": 5.485445161675462e-07, "loss": 0.9305, "step": 4376 }, { "epoch": 0.8974779577609185, "grad_norm": 1.2699763774871826, "learning_rate": 5.463770457481732e-07, "loss": 0.8847, "step": 4377 }, { "epoch": 0.8976830018453967, "grad_norm": 1.2313759326934814, "learning_rate": 5.442137457097185e-07, "loss": 0.8083, "step": 4378 }, { "epoch": 0.8978880459298749, "grad_norm": 1.2322601079940796, "learning_rate": 5.420546170065055e-07, "loss": 0.9611, "step": 4379 }, { "epoch": 0.8980930900143531, "grad_norm": 1.2663745880126953, "learning_rate": 5.398996605910123e-07, "loss": 0.8541, "step": 4380 }, { "epoch": 0.8982981340988313, "grad_norm": 1.2321852445602417, "learning_rate": 5.377488774138806e-07, "loss": 0.8586, "step": 4381 }, { "epoch": 0.8985031781833094, "grad_norm": 1.1781768798828125, "learning_rate": 5.35602268423906e-07, "loss": 0.8721, "step": 4382 }, { "epoch": 0.8987082222677876, "grad_norm": 1.3858755826950073, "learning_rate": 5.334598345680508e-07, "loss": 0.8575, "step": 4383 }, { "epoch": 0.8989132663522658, "grad_norm": 1.1867939233779907, "learning_rate": 5.313215767914248e-07, "loss": 0.8781, "step": 4384 }, { "epoch": 0.899118310436744, "grad_norm": 1.3391231298446655, "learning_rate": 5.291874960373045e-07, "loss": 0.8984, "step": 4385 }, { "epoch": 0.899323354521222, "grad_norm": 1.2571260929107666, "learning_rate": 5.270575932471189e-07, "loss": 0.9516, "step": 4386 }, { "epoch": 0.8995283986057002, "grad_norm": 1.3601080179214478, "learning_rate": 5.249318693604577e-07, "loss": 0.8852, "step": 4387 }, { "epoch": 0.8997334426901784, "grad_norm": 1.2456592321395874, "learning_rate": 5.22810325315064e-07, "loss": 0.8766, "step": 4388 }, { "epoch": 0.8999384867746566, "grad_norm": 1.1998636722564697, "learning_rate": 5.20692962046837e-07, "loss": 0.8763, "step": 4389 }, { "epoch": 0.9001435308591347, "grad_norm": 1.219303846359253, "learning_rate": 5.185797804898318e-07, "loss": 0.8319, "step": 4390 }, { "epoch": 0.9003485749436129, "grad_norm": 1.2016181945800781, "learning_rate": 5.164707815762626e-07, "loss": 0.8917, "step": 4391 }, { "epoch": 0.9005536190280911, "grad_norm": 1.2473862171173096, "learning_rate": 5.143659662364931e-07, "loss": 0.826, "step": 4392 }, { "epoch": 0.9007586631125692, "grad_norm": 1.2652077674865723, "learning_rate": 5.122653353990437e-07, "loss": 0.8686, "step": 4393 }, { "epoch": 0.9009637071970473, "grad_norm": 1.1327050924301147, "learning_rate": 5.101688899905887e-07, "loss": 0.7557, "step": 4394 }, { "epoch": 0.9011687512815255, "grad_norm": 1.24934720993042, "learning_rate": 5.080766309359575e-07, "loss": 0.7993, "step": 4395 }, { "epoch": 0.9013737953660037, "grad_norm": 1.2320773601531982, "learning_rate": 5.059885591581304e-07, "loss": 0.9195, "step": 4396 }, { "epoch": 0.9015788394504819, "grad_norm": 1.333971619606018, "learning_rate": 5.039046755782417e-07, "loss": 0.8345, "step": 4397 }, { "epoch": 0.90178388353496, "grad_norm": 1.1911160945892334, "learning_rate": 5.018249811155773e-07, "loss": 0.8564, "step": 4398 }, { "epoch": 0.9019889276194382, "grad_norm": 1.2704828977584839, "learning_rate": 4.997494766875765e-07, "loss": 0.8686, "step": 4399 }, { "epoch": 0.9021939717039164, "grad_norm": 1.2428042888641357, "learning_rate": 4.976781632098293e-07, "loss": 0.9182, "step": 4400 }, { "epoch": 0.9023990157883945, "grad_norm": 1.2787052392959595, "learning_rate": 4.956110415960779e-07, "loss": 0.8435, "step": 4401 }, { "epoch": 0.9026040598728726, "grad_norm": 1.2184784412384033, "learning_rate": 4.935481127582131e-07, "loss": 0.8869, "step": 4402 }, { "epoch": 0.9028091039573508, "grad_norm": 1.2575321197509766, "learning_rate": 4.914893776062768e-07, "loss": 0.9004, "step": 4403 }, { "epoch": 0.903014148041829, "grad_norm": 1.1418884992599487, "learning_rate": 4.894348370484648e-07, "loss": 0.809, "step": 4404 }, { "epoch": 0.9032191921263072, "grad_norm": 1.2502343654632568, "learning_rate": 4.873844919911152e-07, "loss": 0.9245, "step": 4405 }, { "epoch": 0.9034242362107853, "grad_norm": 1.377091646194458, "learning_rate": 4.853383433387237e-07, "loss": 0.9561, "step": 4406 }, { "epoch": 0.9036292802952635, "grad_norm": 1.3172470331192017, "learning_rate": 4.83296391993926e-07, "loss": 0.8804, "step": 4407 }, { "epoch": 0.9038343243797416, "grad_norm": 1.3452873229980469, "learning_rate": 4.812586388575125e-07, "loss": 0.8286, "step": 4408 }, { "epoch": 0.9040393684642198, "grad_norm": 1.2863438129425049, "learning_rate": 4.792250848284208e-07, "loss": 0.831, "step": 4409 }, { "epoch": 0.9042444125486979, "grad_norm": 1.2382211685180664, "learning_rate": 4.771957308037345e-07, "loss": 0.9156, "step": 4410 }, { "epoch": 0.9044494566331761, "grad_norm": 1.2973533868789673, "learning_rate": 4.7517057767868193e-07, "loss": 0.7833, "step": 4411 }, { "epoch": 0.9046545007176543, "grad_norm": 1.267555832862854, "learning_rate": 4.7314962634664616e-07, "loss": 0.8301, "step": 4412 }, { "epoch": 0.9048595448021325, "grad_norm": 1.1693958044052124, "learning_rate": 4.7113287769914864e-07, "loss": 0.8799, "step": 4413 }, { "epoch": 0.9050645888866106, "grad_norm": 1.2631844282150269, "learning_rate": 4.6912033262585866e-07, "loss": 0.8995, "step": 4414 }, { "epoch": 0.9052696329710888, "grad_norm": 1.3644771575927734, "learning_rate": 4.6711199201459833e-07, "loss": 0.9235, "step": 4415 }, { "epoch": 0.905474677055567, "grad_norm": 1.2967671155929565, "learning_rate": 4.651078567513212e-07, "loss": 0.9183, "step": 4416 }, { "epoch": 0.9056797211400451, "grad_norm": 1.1696008443832397, "learning_rate": 4.631079277201389e-07, "loss": 0.8954, "step": 4417 }, { "epoch": 0.9058847652245233, "grad_norm": 1.333626389503479, "learning_rate": 4.611122058033002e-07, "loss": 0.911, "step": 4418 }, { "epoch": 0.9060898093090014, "grad_norm": 1.2339637279510498, "learning_rate": 4.591206918812019e-07, "loss": 0.8907, "step": 4419 }, { "epoch": 0.9062948533934796, "grad_norm": 1.2748000621795654, "learning_rate": 4.571333868323791e-07, "loss": 0.9216, "step": 4420 }, { "epoch": 0.9064998974779578, "grad_norm": 1.3250389099121094, "learning_rate": 4.551502915335171e-07, "loss": 0.8537, "step": 4421 }, { "epoch": 0.906704941562436, "grad_norm": 1.2647972106933594, "learning_rate": 4.5317140685943726e-07, "loss": 0.8394, "step": 4422 }, { "epoch": 0.906909985646914, "grad_norm": 1.2034505605697632, "learning_rate": 4.511967336831091e-07, "loss": 0.9171, "step": 4423 }, { "epoch": 0.9071150297313922, "grad_norm": 1.306334376335144, "learning_rate": 4.492262728756425e-07, "loss": 0.8116, "step": 4424 }, { "epoch": 0.9073200738158704, "grad_norm": 1.2332963943481445, "learning_rate": 4.472600253062875e-07, "loss": 0.9002, "step": 4425 }, { "epoch": 0.9075251179003486, "grad_norm": 1.1894028186798096, "learning_rate": 4.4529799184243495e-07, "loss": 0.8724, "step": 4426 }, { "epoch": 0.9077301619848267, "grad_norm": 1.4112248420715332, "learning_rate": 4.433401733496201e-07, "loss": 0.8693, "step": 4427 }, { "epoch": 0.9079352060693049, "grad_norm": 1.2492507696151733, "learning_rate": 4.413865706915177e-07, "loss": 0.8745, "step": 4428 }, { "epoch": 0.9081402501537831, "grad_norm": 1.2714295387268066, "learning_rate": 4.394371847299417e-07, "loss": 0.8745, "step": 4429 }, { "epoch": 0.9083452942382613, "grad_norm": 1.254710078239441, "learning_rate": 4.3749201632484417e-07, "loss": 0.7936, "step": 4430 }, { "epoch": 0.9085503383227393, "grad_norm": 1.2465006113052368, "learning_rate": 4.355510663343199e-07, "loss": 0.8507, "step": 4431 }, { "epoch": 0.9087553824072175, "grad_norm": 1.277958869934082, "learning_rate": 4.3361433561460274e-07, "loss": 0.7767, "step": 4432 }, { "epoch": 0.9089604264916957, "grad_norm": 1.171824336051941, "learning_rate": 4.3168182502006384e-07, "loss": 0.8585, "step": 4433 }, { "epoch": 0.9091654705761739, "grad_norm": 1.3741456270217896, "learning_rate": 4.297535354032112e-07, "loss": 0.9559, "step": 4434 }, { "epoch": 0.909370514660652, "grad_norm": 1.2520829439163208, "learning_rate": 4.2782946761469325e-07, "loss": 0.8425, "step": 4435 }, { "epoch": 0.9095755587451302, "grad_norm": 1.2578203678131104, "learning_rate": 4.259096225032955e-07, "loss": 0.8706, "step": 4436 }, { "epoch": 0.9097806028296084, "grad_norm": 1.2094908952713013, "learning_rate": 4.2399400091594154e-07, "loss": 0.952, "step": 4437 }, { "epoch": 0.9099856469140866, "grad_norm": 1.294121265411377, "learning_rate": 4.220826036976888e-07, "loss": 0.8639, "step": 4438 }, { "epoch": 0.9101906909985646, "grad_norm": 1.3488529920578003, "learning_rate": 4.2017543169173167e-07, "loss": 0.805, "step": 4439 }, { "epoch": 0.9103957350830428, "grad_norm": 1.3391317129135132, "learning_rate": 4.1827248573940604e-07, "loss": 0.9063, "step": 4440 }, { "epoch": 0.910600779167521, "grad_norm": 1.2048852443695068, "learning_rate": 4.1637376668017706e-07, "loss": 0.8607, "step": 4441 }, { "epoch": 0.9108058232519992, "grad_norm": 1.316481590270996, "learning_rate": 4.14479275351648e-07, "loss": 0.9583, "step": 4442 }, { "epoch": 0.9110108673364773, "grad_norm": 1.2052688598632812, "learning_rate": 4.12589012589556e-07, "loss": 0.8767, "step": 4443 }, { "epoch": 0.9112159114209555, "grad_norm": 1.237836241722107, "learning_rate": 4.1070297922777505e-07, "loss": 0.9312, "step": 4444 }, { "epoch": 0.9114209555054337, "grad_norm": 1.1567269563674927, "learning_rate": 4.088211760983096e-07, "loss": 0.8241, "step": 4445 }, { "epoch": 0.9116259995899119, "grad_norm": 1.2415292263031006, "learning_rate": 4.069436040313024e-07, "loss": 0.8632, "step": 4446 }, { "epoch": 0.9118310436743899, "grad_norm": 1.222610354423523, "learning_rate": 4.0507026385502747e-07, "loss": 0.9004, "step": 4447 }, { "epoch": 0.9120360877588681, "grad_norm": 1.2811239957809448, "learning_rate": 4.032011563958893e-07, "loss": 0.8343, "step": 4448 }, { "epoch": 0.9122411318433463, "grad_norm": 1.1851816177368164, "learning_rate": 4.013362824784306e-07, "loss": 0.8344, "step": 4449 }, { "epoch": 0.9124461759278245, "grad_norm": 1.2984344959259033, "learning_rate": 3.9947564292532213e-07, "loss": 0.8471, "step": 4450 }, { "epoch": 0.9126512200123027, "grad_norm": 1.3662593364715576, "learning_rate": 3.9761923855737073e-07, "loss": 0.8849, "step": 4451 }, { "epoch": 0.9128562640967808, "grad_norm": 1.2905231714248657, "learning_rate": 3.9576707019350903e-07, "loss": 0.8493, "step": 4452 }, { "epoch": 0.913061308181259, "grad_norm": 1.298091173171997, "learning_rate": 3.939191386508079e-07, "loss": 0.8383, "step": 4453 }, { "epoch": 0.9132663522657372, "grad_norm": 1.2273625135421753, "learning_rate": 3.92075444744463e-07, "loss": 0.8771, "step": 4454 }, { "epoch": 0.9134713963502153, "grad_norm": 1.29596745967865, "learning_rate": 3.9023598928780495e-07, "loss": 0.8783, "step": 4455 }, { "epoch": 0.9136764404346934, "grad_norm": 1.2831368446350098, "learning_rate": 3.884007730922934e-07, "loss": 0.8824, "step": 4456 }, { "epoch": 0.9138814845191716, "grad_norm": 1.17350435256958, "learning_rate": 3.865697969675164e-07, "loss": 0.8079, "step": 4457 }, { "epoch": 0.9140865286036498, "grad_norm": 1.2412747144699097, "learning_rate": 3.8474306172119226e-07, "loss": 0.8207, "step": 4458 }, { "epoch": 0.914291572688128, "grad_norm": 1.1661136150360107, "learning_rate": 3.8292056815916965e-07, "loss": 0.8405, "step": 4459 }, { "epoch": 0.9144966167726061, "grad_norm": 1.2793089151382446, "learning_rate": 3.811023170854256e-07, "loss": 0.8813, "step": 4460 }, { "epoch": 0.9147016608570843, "grad_norm": 1.3225795030593872, "learning_rate": 3.7928830930206493e-07, "loss": 0.8936, "step": 4461 }, { "epoch": 0.9149067049415625, "grad_norm": 1.2135087251663208, "learning_rate": 3.7747854560931996e-07, "loss": 0.8685, "step": 4462 }, { "epoch": 0.9151117490260406, "grad_norm": 1.348022222518921, "learning_rate": 3.7567302680555194e-07, "loss": 0.8364, "step": 4463 }, { "epoch": 0.9153167931105187, "grad_norm": 1.2499092817306519, "learning_rate": 3.7387175368725157e-07, "loss": 0.8674, "step": 4464 }, { "epoch": 0.9155218371949969, "grad_norm": 1.30613112449646, "learning_rate": 3.7207472704903216e-07, "loss": 0.8695, "step": 4465 }, { "epoch": 0.9157268812794751, "grad_norm": 1.2090438604354858, "learning_rate": 3.7028194768363614e-07, "loss": 0.8541, "step": 4466 }, { "epoch": 0.9159319253639533, "grad_norm": 1.1642957925796509, "learning_rate": 3.684934163819309e-07, "loss": 0.9284, "step": 4467 }, { "epoch": 0.9161369694484314, "grad_norm": 1.2165474891662598, "learning_rate": 3.667091339329143e-07, "loss": 0.8942, "step": 4468 }, { "epoch": 0.9163420135329096, "grad_norm": 1.2948554754257202, "learning_rate": 3.649291011237033e-07, "loss": 0.7932, "step": 4469 }, { "epoch": 0.9165470576173878, "grad_norm": 1.1924505233764648, "learning_rate": 3.631533187395453e-07, "loss": 0.8046, "step": 4470 }, { "epoch": 0.9167521017018659, "grad_norm": 1.3821035623550415, "learning_rate": 3.6138178756380815e-07, "loss": 0.9007, "step": 4471 }, { "epoch": 0.916957145786344, "grad_norm": 1.1819382905960083, "learning_rate": 3.596145083779912e-07, "loss": 0.8563, "step": 4472 }, { "epoch": 0.9171621898708222, "grad_norm": 1.3807649612426758, "learning_rate": 3.578514819617107e-07, "loss": 0.943, "step": 4473 }, { "epoch": 0.9173672339553004, "grad_norm": 1.2330390214920044, "learning_rate": 3.5609270909271134e-07, "loss": 0.8969, "step": 4474 }, { "epoch": 0.9175722780397786, "grad_norm": 1.310711145401001, "learning_rate": 3.543381905468568e-07, "loss": 0.8193, "step": 4475 }, { "epoch": 0.9177773221242567, "grad_norm": 1.3008917570114136, "learning_rate": 3.5258792709814137e-07, "loss": 0.9203, "step": 4476 }, { "epoch": 0.9179823662087349, "grad_norm": 1.1396777629852295, "learning_rate": 3.508419195186774e-07, "loss": 0.8752, "step": 4477 }, { "epoch": 0.918187410293213, "grad_norm": 1.4632562398910522, "learning_rate": 3.491001685786988e-07, "loss": 0.8929, "step": 4478 }, { "epoch": 0.9183924543776912, "grad_norm": 1.3413300514221191, "learning_rate": 3.473626750465642e-07, "loss": 0.8808, "step": 4479 }, { "epoch": 0.9185974984621693, "grad_norm": 1.2862504720687866, "learning_rate": 3.4562943968875164e-07, "loss": 0.9071, "step": 4480 }, { "epoch": 0.9188025425466475, "grad_norm": 1.1653521060943604, "learning_rate": 3.4390046326986506e-07, "loss": 0.8551, "step": 4481 }, { "epoch": 0.9190075866311257, "grad_norm": 1.3711013793945312, "learning_rate": 3.421757465526243e-07, "loss": 0.904, "step": 4482 }, { "epoch": 0.9192126307156039, "grad_norm": 1.287007451057434, "learning_rate": 3.404552902978764e-07, "loss": 0.9212, "step": 4483 }, { "epoch": 0.919417674800082, "grad_norm": 1.284903883934021, "learning_rate": 3.3873909526458083e-07, "loss": 0.8536, "step": 4484 }, { "epoch": 0.9196227188845602, "grad_norm": 1.25301992893219, "learning_rate": 3.370271622098242e-07, "loss": 0.9062, "step": 4485 }, { "epoch": 0.9198277629690383, "grad_norm": 1.1434870958328247, "learning_rate": 3.353194918888092e-07, "loss": 0.8858, "step": 4486 }, { "epoch": 0.9200328070535165, "grad_norm": 1.2983989715576172, "learning_rate": 3.33616085054862e-07, "loss": 0.8808, "step": 4487 }, { "epoch": 0.9202378511379947, "grad_norm": 1.2270197868347168, "learning_rate": 3.319169424594204e-07, "loss": 0.7691, "step": 4488 }, { "epoch": 0.9204428952224728, "grad_norm": 1.2231273651123047, "learning_rate": 3.302220648520493e-07, "loss": 0.8053, "step": 4489 }, { "epoch": 0.920647939306951, "grad_norm": 1.218152403831482, "learning_rate": 3.2853145298042954e-07, "loss": 0.8784, "step": 4490 }, { "epoch": 0.9208529833914292, "grad_norm": 1.2188615798950195, "learning_rate": 3.268451075903556e-07, "loss": 0.8918, "step": 4491 }, { "epoch": 0.9210580274759074, "grad_norm": 1.2687867879867554, "learning_rate": 3.2516302942574794e-07, "loss": 0.8717, "step": 4492 }, { "epoch": 0.9212630715603854, "grad_norm": 1.3036953210830688, "learning_rate": 3.234852192286375e-07, "loss": 0.9534, "step": 4493 }, { "epoch": 0.9214681156448636, "grad_norm": 1.3652997016906738, "learning_rate": 3.218116777391755e-07, "loss": 0.9193, "step": 4494 }, { "epoch": 0.9216731597293418, "grad_norm": 1.257516622543335, "learning_rate": 3.2014240569563036e-07, "loss": 0.8677, "step": 4495 }, { "epoch": 0.92187820381382, "grad_norm": 1.423225998878479, "learning_rate": 3.184774038343874e-07, "loss": 0.9361, "step": 4496 }, { "epoch": 0.9220832478982981, "grad_norm": 1.2631131410598755, "learning_rate": 3.1681667288994353e-07, "loss": 0.8988, "step": 4497 }, { "epoch": 0.9222882919827763, "grad_norm": 1.310631513595581, "learning_rate": 3.1516021359492053e-07, "loss": 0.844, "step": 4498 }, { "epoch": 0.9224933360672545, "grad_norm": 1.178169846534729, "learning_rate": 3.13508026680045e-07, "loss": 0.8259, "step": 4499 }, { "epoch": 0.9226983801517327, "grad_norm": 1.2019528150558472, "learning_rate": 3.118601128741694e-07, "loss": 0.8569, "step": 4500 }, { "epoch": 0.9229034242362107, "grad_norm": 1.2633264064788818, "learning_rate": 3.102164729042545e-07, "loss": 0.8279, "step": 4501 }, { "epoch": 0.9231084683206889, "grad_norm": 1.4699537754058838, "learning_rate": 3.0857710749537585e-07, "loss": 0.8479, "step": 4502 }, { "epoch": 0.9233135124051671, "grad_norm": 1.1894359588623047, "learning_rate": 3.069420173707249e-07, "loss": 0.8577, "step": 4503 }, { "epoch": 0.9235185564896453, "grad_norm": 1.335996150970459, "learning_rate": 3.053112032516092e-07, "loss": 0.81, "step": 4504 }, { "epoch": 0.9237236005741234, "grad_norm": 1.3111982345581055, "learning_rate": 3.036846658574455e-07, "loss": 0.9168, "step": 4505 }, { "epoch": 0.9239286446586016, "grad_norm": 1.3043838739395142, "learning_rate": 3.020624059057675e-07, "loss": 0.8126, "step": 4506 }, { "epoch": 0.9241336887430798, "grad_norm": 1.1565274000167847, "learning_rate": 3.0044442411222066e-07, "loss": 0.8503, "step": 4507 }, { "epoch": 0.924338732827558, "grad_norm": 1.2556357383728027, "learning_rate": 2.9883072119056055e-07, "loss": 0.7364, "step": 4508 }, { "epoch": 0.924543776912036, "grad_norm": 1.3905823230743408, "learning_rate": 2.972212978526623e-07, "loss": 0.7463, "step": 4509 }, { "epoch": 0.9247488209965142, "grad_norm": 1.260475993156433, "learning_rate": 2.9561615480850456e-07, "loss": 0.8209, "step": 4510 }, { "epoch": 0.9249538650809924, "grad_norm": 1.1825456619262695, "learning_rate": 2.9401529276618434e-07, "loss": 0.8546, "step": 4511 }, { "epoch": 0.9251589091654706, "grad_norm": 1.299703598022461, "learning_rate": 2.9241871243190555e-07, "loss": 0.8283, "step": 4512 }, { "epoch": 0.9253639532499487, "grad_norm": 1.3229396343231201, "learning_rate": 2.908264145099859e-07, "loss": 0.9244, "step": 4513 }, { "epoch": 0.9255689973344269, "grad_norm": 1.238745093345642, "learning_rate": 2.8923839970285473e-07, "loss": 0.8707, "step": 4514 }, { "epoch": 0.9257740414189051, "grad_norm": 1.27024245262146, "learning_rate": 2.876546687110493e-07, "loss": 0.8656, "step": 4515 }, { "epoch": 0.9259790855033833, "grad_norm": 1.2239010334014893, "learning_rate": 2.860752222332186e-07, "loss": 0.8469, "step": 4516 }, { "epoch": 0.9261841295878613, "grad_norm": 1.2763746976852417, "learning_rate": 2.845000609661208e-07, "loss": 0.8647, "step": 4517 }, { "epoch": 0.9263891736723395, "grad_norm": 1.240524411201477, "learning_rate": 2.8292918560462346e-07, "loss": 0.8387, "step": 4518 }, { "epoch": 0.9265942177568177, "grad_norm": 1.2213166952133179, "learning_rate": 2.813625968417077e-07, "loss": 0.8157, "step": 4519 }, { "epoch": 0.9267992618412959, "grad_norm": 1.2613388299942017, "learning_rate": 2.798002953684553e-07, "loss": 0.8796, "step": 4520 }, { "epoch": 0.927004305925774, "grad_norm": 1.3667573928833008, "learning_rate": 2.782422818740649e-07, "loss": 0.9376, "step": 4521 }, { "epoch": 0.9272093500102522, "grad_norm": 1.2454394102096558, "learning_rate": 2.7668855704583997e-07, "loss": 0.8616, "step": 4522 }, { "epoch": 0.9274143940947304, "grad_norm": 1.3069651126861572, "learning_rate": 2.751391215691901e-07, "loss": 0.8595, "step": 4523 }, { "epoch": 0.9276194381792086, "grad_norm": 1.2756271362304688, "learning_rate": 2.735939761276352e-07, "loss": 0.8635, "step": 4524 }, { "epoch": 0.9278244822636867, "grad_norm": 1.2852973937988281, "learning_rate": 2.720531214028055e-07, "loss": 0.9037, "step": 4525 }, { "epoch": 0.9280295263481648, "grad_norm": 1.3378267288208008, "learning_rate": 2.705165580744318e-07, "loss": 0.8606, "step": 4526 }, { "epoch": 0.928234570432643, "grad_norm": 1.3349868059158325, "learning_rate": 2.689842868203563e-07, "loss": 0.9061, "step": 4527 }, { "epoch": 0.9284396145171212, "grad_norm": 1.2713695764541626, "learning_rate": 2.6745630831652826e-07, "loss": 0.8936, "step": 4528 }, { "epoch": 0.9286446586015994, "grad_norm": 1.2722750902175903, "learning_rate": 2.659326232369974e-07, "loss": 0.9023, "step": 4529 }, { "epoch": 0.9288497026860775, "grad_norm": 1.2420580387115479, "learning_rate": 2.6441323225392946e-07, "loss": 0.9005, "step": 4530 }, { "epoch": 0.9290547467705557, "grad_norm": 1.322798728942871, "learning_rate": 2.6289813603758483e-07, "loss": 0.9163, "step": 4531 }, { "epoch": 0.9292597908550339, "grad_norm": 1.2146927118301392, "learning_rate": 2.6138733525633896e-07, "loss": 0.8876, "step": 4532 }, { "epoch": 0.929464834939512, "grad_norm": 1.2532416582107544, "learning_rate": 2.5988083057666534e-07, "loss": 0.8563, "step": 4533 }, { "epoch": 0.9296698790239901, "grad_norm": 1.1718672513961792, "learning_rate": 2.5837862266314574e-07, "loss": 0.8313, "step": 4534 }, { "epoch": 0.9298749231084683, "grad_norm": 1.2516623735427856, "learning_rate": 2.5688071217846444e-07, "loss": 0.9038, "step": 4535 }, { "epoch": 0.9300799671929465, "grad_norm": 1.239755392074585, "learning_rate": 2.55387099783414e-07, "loss": 0.8879, "step": 4536 }, { "epoch": 0.9302850112774247, "grad_norm": 1.1850943565368652, "learning_rate": 2.5389778613688744e-07, "loss": 0.8314, "step": 4537 }, { "epoch": 0.9304900553619028, "grad_norm": 1.2430781126022339, "learning_rate": 2.524127718958802e-07, "loss": 0.873, "step": 4538 }, { "epoch": 0.930695099446381, "grad_norm": 1.3689095973968506, "learning_rate": 2.509320577154939e-07, "loss": 0.8694, "step": 4539 }, { "epoch": 0.9309001435308591, "grad_norm": 1.309195876121521, "learning_rate": 2.4945564424893265e-07, "loss": 0.9251, "step": 4540 }, { "epoch": 0.9311051876153373, "grad_norm": 1.2619818449020386, "learning_rate": 2.4798353214750325e-07, "loss": 0.9054, "step": 4541 }, { "epoch": 0.9313102316998154, "grad_norm": 1.2701376676559448, "learning_rate": 2.46515722060614e-07, "loss": 0.8853, "step": 4542 }, { "epoch": 0.9315152757842936, "grad_norm": 1.2418934106826782, "learning_rate": 2.4505221463577586e-07, "loss": 0.7827, "step": 4543 }, { "epoch": 0.9317203198687718, "grad_norm": 1.2249329090118408, "learning_rate": 2.435930105186024e-07, "loss": 0.8472, "step": 4544 }, { "epoch": 0.93192536395325, "grad_norm": 1.3398040533065796, "learning_rate": 2.4213811035280976e-07, "loss": 0.7794, "step": 4545 }, { "epoch": 0.9321304080377281, "grad_norm": 1.2435481548309326, "learning_rate": 2.406875147802135e-07, "loss": 0.9157, "step": 4546 }, { "epoch": 0.9323354521222063, "grad_norm": 1.3597077131271362, "learning_rate": 2.392412244407294e-07, "loss": 0.9089, "step": 4547 }, { "epoch": 0.9325404962066844, "grad_norm": 1.2207516431808472, "learning_rate": 2.3779923997237497e-07, "loss": 0.8386, "step": 4548 }, { "epoch": 0.9327455402911626, "grad_norm": 1.266139030456543, "learning_rate": 2.3636156201127137e-07, "loss": 0.9278, "step": 4549 }, { "epoch": 0.9329505843756407, "grad_norm": 1.0952273607254028, "learning_rate": 2.3492819119163679e-07, "loss": 0.8341, "step": 4550 }, { "epoch": 0.9331556284601189, "grad_norm": 1.176849126815796, "learning_rate": 2.3349912814578878e-07, "loss": 0.9003, "step": 4551 }, { "epoch": 0.9333606725445971, "grad_norm": 1.1984113454818726, "learning_rate": 2.3207437350414418e-07, "loss": 0.9078, "step": 4552 }, { "epoch": 0.9335657166290753, "grad_norm": 1.1690648794174194, "learning_rate": 2.3065392789522467e-07, "loss": 0.7723, "step": 4553 }, { "epoch": 0.9337707607135534, "grad_norm": 1.1248048543930054, "learning_rate": 2.2923779194564454e-07, "loss": 0.9132, "step": 4554 }, { "epoch": 0.9339758047980316, "grad_norm": 1.2972559928894043, "learning_rate": 2.2782596628012076e-07, "loss": 0.9093, "step": 4555 }, { "epoch": 0.9341808488825097, "grad_norm": 1.2743008136749268, "learning_rate": 2.2641845152146514e-07, "loss": 0.8899, "step": 4556 }, { "epoch": 0.9343858929669879, "grad_norm": 1.2316393852233887, "learning_rate": 2.2501524829059208e-07, "loss": 0.8413, "step": 4557 }, { "epoch": 0.9345909370514661, "grad_norm": 1.2217572927474976, "learning_rate": 2.2361635720651199e-07, "loss": 0.868, "step": 4558 }, { "epoch": 0.9347959811359442, "grad_norm": 1.357150673866272, "learning_rate": 2.2222177888633345e-07, "loss": 0.8993, "step": 4559 }, { "epoch": 0.9350010252204224, "grad_norm": 1.307058572769165, "learning_rate": 2.2083151394526215e-07, "loss": 0.8875, "step": 4560 }, { "epoch": 0.9352060693049006, "grad_norm": 1.2693073749542236, "learning_rate": 2.1944556299659858e-07, "loss": 0.9558, "step": 4561 }, { "epoch": 0.9354111133893788, "grad_norm": 1.2166670560836792, "learning_rate": 2.180639266517448e-07, "loss": 0.8708, "step": 4562 }, { "epoch": 0.9356161574738568, "grad_norm": 1.281575083732605, "learning_rate": 2.166866055201966e-07, "loss": 0.8787, "step": 4563 }, { "epoch": 0.935821201558335, "grad_norm": 1.3386521339416504, "learning_rate": 2.1531360020955018e-07, "loss": 0.8678, "step": 4564 }, { "epoch": 0.9360262456428132, "grad_norm": 1.2645306587219238, "learning_rate": 2.1394491132548768e-07, "loss": 0.8314, "step": 4565 }, { "epoch": 0.9362312897272914, "grad_norm": 1.1537350416183472, "learning_rate": 2.1258053947179945e-07, "loss": 0.7837, "step": 4566 }, { "epoch": 0.9364363338117695, "grad_norm": 1.157039761543274, "learning_rate": 2.1122048525036409e-07, "loss": 0.926, "step": 4567 }, { "epoch": 0.9366413778962477, "grad_norm": 1.1378505229949951, "learning_rate": 2.098647492611583e-07, "loss": 0.7318, "step": 4568 }, { "epoch": 0.9368464219807259, "grad_norm": 1.2823847532272339, "learning_rate": 2.0851333210225032e-07, "loss": 0.882, "step": 4569 }, { "epoch": 0.9370514660652041, "grad_norm": 1.3482725620269775, "learning_rate": 2.0716623436980777e-07, "loss": 0.8918, "step": 4570 }, { "epoch": 0.9372565101496821, "grad_norm": 1.182999610900879, "learning_rate": 2.0582345665809078e-07, "loss": 0.8526, "step": 4571 }, { "epoch": 0.9374615542341603, "grad_norm": 1.3528646230697632, "learning_rate": 2.0448499955945223e-07, "loss": 0.9165, "step": 4572 }, { "epoch": 0.9376665983186385, "grad_norm": 1.4160534143447876, "learning_rate": 2.03150863664342e-07, "loss": 0.9554, "step": 4573 }, { "epoch": 0.9378716424031167, "grad_norm": 1.2573957443237305, "learning_rate": 2.0182104956130267e-07, "loss": 0.9075, "step": 4574 }, { "epoch": 0.9380766864875948, "grad_norm": 1.2430073022842407, "learning_rate": 2.0049555783696716e-07, "loss": 0.8776, "step": 4575 }, { "epoch": 0.938281730572073, "grad_norm": 1.1950870752334595, "learning_rate": 1.9917438907606556e-07, "loss": 0.852, "step": 4576 }, { "epoch": 0.9384867746565512, "grad_norm": 1.3891929388046265, "learning_rate": 1.9785754386142164e-07, "loss": 0.918, "step": 4577 }, { "epoch": 0.9386918187410294, "grad_norm": 1.319639801979065, "learning_rate": 1.9654502277394628e-07, "loss": 0.8879, "step": 4578 }, { "epoch": 0.9388968628255074, "grad_norm": 1.2817227840423584, "learning_rate": 1.9523682639264852e-07, "loss": 0.8798, "step": 4579 }, { "epoch": 0.9391019069099856, "grad_norm": 1.3537907600402832, "learning_rate": 1.9393295529462674e-07, "loss": 0.8777, "step": 4580 }, { "epoch": 0.9393069509944638, "grad_norm": 1.2124191522598267, "learning_rate": 1.9263341005507308e-07, "loss": 0.828, "step": 4581 }, { "epoch": 0.939511995078942, "grad_norm": 1.2158926725387573, "learning_rate": 1.9133819124727003e-07, "loss": 0.8621, "step": 4582 }, { "epoch": 0.9397170391634201, "grad_norm": 1.2503573894500732, "learning_rate": 1.900472994425906e-07, "loss": 0.855, "step": 4583 }, { "epoch": 0.9399220832478983, "grad_norm": 1.2322396039962769, "learning_rate": 1.887607352104992e-07, "loss": 0.8594, "step": 4584 }, { "epoch": 0.9401271273323765, "grad_norm": 1.2013776302337646, "learning_rate": 1.874784991185552e-07, "loss": 0.8654, "step": 4585 }, { "epoch": 0.9403321714168547, "grad_norm": 1.3075995445251465, "learning_rate": 1.862005917324039e-07, "loss": 0.9374, "step": 4586 }, { "epoch": 0.9405372155013327, "grad_norm": 1.393054723739624, "learning_rate": 1.8492701361578326e-07, "loss": 0.9763, "step": 4587 }, { "epoch": 0.9407422595858109, "grad_norm": 1.3066915273666382, "learning_rate": 1.8365776533051939e-07, "loss": 0.9828, "step": 4588 }, { "epoch": 0.9409473036702891, "grad_norm": 1.2261958122253418, "learning_rate": 1.8239284743652996e-07, "loss": 0.9327, "step": 4589 }, { "epoch": 0.9411523477547673, "grad_norm": 1.1840550899505615, "learning_rate": 1.811322604918242e-07, "loss": 0.883, "step": 4590 }, { "epoch": 0.9413573918392454, "grad_norm": 1.2624096870422363, "learning_rate": 1.7987600505249726e-07, "loss": 0.8466, "step": 4591 }, { "epoch": 0.9415624359237236, "grad_norm": 1.2975229024887085, "learning_rate": 1.7862408167273472e-07, "loss": 0.8227, "step": 4592 }, { "epoch": 0.9417674800082018, "grad_norm": 1.2292503118515015, "learning_rate": 1.7737649090481146e-07, "loss": 0.863, "step": 4593 }, { "epoch": 0.94197252409268, "grad_norm": 1.1661250591278076, "learning_rate": 1.761332332990917e-07, "loss": 0.7495, "step": 4594 }, { "epoch": 0.9421775681771581, "grad_norm": 1.2304184436798096, "learning_rate": 1.7489430940402674e-07, "loss": 0.9218, "step": 4595 }, { "epoch": 0.9423826122616362, "grad_norm": 1.1741549968719482, "learning_rate": 1.7365971976615715e-07, "loss": 0.8809, "step": 4596 }, { "epoch": 0.9425876563461144, "grad_norm": 1.3800289630889893, "learning_rate": 1.724294649301095e-07, "loss": 0.8742, "step": 4597 }, { "epoch": 0.9427927004305926, "grad_norm": 1.22848641872406, "learning_rate": 1.7120354543860297e-07, "loss": 0.8462, "step": 4598 }, { "epoch": 0.9429977445150708, "grad_norm": 1.2304333448410034, "learning_rate": 1.699819618324372e-07, "loss": 0.8983, "step": 4599 }, { "epoch": 0.9432027885995489, "grad_norm": 1.141103744506836, "learning_rate": 1.687647146505067e-07, "loss": 0.8666, "step": 4600 }, { "epoch": 0.9434078326840271, "grad_norm": 1.1996232271194458, "learning_rate": 1.6755180442978414e-07, "loss": 0.8613, "step": 4601 }, { "epoch": 0.9436128767685052, "grad_norm": 1.2061500549316406, "learning_rate": 1.6634323170533928e-07, "loss": 0.8736, "step": 4602 }, { "epoch": 0.9438179208529834, "grad_norm": 1.294037938117981, "learning_rate": 1.6513899701032011e-07, "loss": 0.8897, "step": 4603 }, { "epoch": 0.9440229649374615, "grad_norm": 1.3555123805999756, "learning_rate": 1.6393910087596388e-07, "loss": 0.9361, "step": 4604 }, { "epoch": 0.9442280090219397, "grad_norm": 1.3328337669372559, "learning_rate": 1.6274354383159495e-07, "loss": 0.8995, "step": 4605 }, { "epoch": 0.9444330531064179, "grad_norm": 1.2028437852859497, "learning_rate": 1.615523264046226e-07, "loss": 0.9111, "step": 4606 }, { "epoch": 0.9446380971908961, "grad_norm": 1.306161642074585, "learning_rate": 1.6036544912054087e-07, "loss": 0.9021, "step": 4607 }, { "epoch": 0.9448431412753742, "grad_norm": 1.3564380407333374, "learning_rate": 1.5918291250292984e-07, "loss": 0.8362, "step": 4608 }, { "epoch": 0.9450481853598524, "grad_norm": 1.267255425453186, "learning_rate": 1.5800471707345776e-07, "loss": 0.8206, "step": 4609 }, { "epoch": 0.9452532294443305, "grad_norm": 1.1804654598236084, "learning_rate": 1.5683086335187113e-07, "loss": 0.8422, "step": 4610 }, { "epoch": 0.9454582735288087, "grad_norm": 1.2244185209274292, "learning_rate": 1.5566135185600796e-07, "loss": 0.82, "step": 4611 }, { "epoch": 0.9456633176132868, "grad_norm": 1.2357732057571411, "learning_rate": 1.544961831017855e-07, "loss": 0.925, "step": 4612 }, { "epoch": 0.945868361697765, "grad_norm": 1.487045407295227, "learning_rate": 1.5333535760320929e-07, "loss": 0.9101, "step": 4613 }, { "epoch": 0.9460734057822432, "grad_norm": 1.295696496963501, "learning_rate": 1.5217887587236524e-07, "loss": 0.9032, "step": 4614 }, { "epoch": 0.9462784498667214, "grad_norm": 1.3816044330596924, "learning_rate": 1.510267384194275e-07, "loss": 0.937, "step": 4615 }, { "epoch": 0.9464834939511995, "grad_norm": 1.306644082069397, "learning_rate": 1.4987894575264727e-07, "loss": 0.8289, "step": 4616 }, { "epoch": 0.9466885380356777, "grad_norm": 1.3251302242279053, "learning_rate": 1.487354983783673e-07, "loss": 0.8851, "step": 4617 }, { "epoch": 0.9468935821201558, "grad_norm": 1.3666726350784302, "learning_rate": 1.4759639680100634e-07, "loss": 0.8051, "step": 4618 }, { "epoch": 0.947098626204634, "grad_norm": 1.2362792491912842, "learning_rate": 1.464616415230702e-07, "loss": 0.8, "step": 4619 }, { "epoch": 0.9473036702891121, "grad_norm": 1.2328169345855713, "learning_rate": 1.4533123304514507e-07, "loss": 0.8827, "step": 4620 }, { "epoch": 0.9475087143735903, "grad_norm": 1.2141413688659668, "learning_rate": 1.442051718658999e-07, "loss": 0.8717, "step": 4621 }, { "epoch": 0.9477137584580685, "grad_norm": 1.284887671470642, "learning_rate": 1.430834584820895e-07, "loss": 0.9431, "step": 4622 }, { "epoch": 0.9479188025425467, "grad_norm": 1.3940824270248413, "learning_rate": 1.419660933885436e-07, "loss": 0.8978, "step": 4623 }, { "epoch": 0.9481238466270248, "grad_norm": 1.2104573249816895, "learning_rate": 1.408530770781813e-07, "loss": 0.8185, "step": 4624 }, { "epoch": 0.948328890711503, "grad_norm": 1.1741936206817627, "learning_rate": 1.3974441004199646e-07, "loss": 0.872, "step": 4625 }, { "epoch": 0.9485339347959811, "grad_norm": 1.171208381652832, "learning_rate": 1.386400927690712e-07, "loss": 0.827, "step": 4626 }, { "epoch": 0.9487389788804593, "grad_norm": 1.2492865324020386, "learning_rate": 1.375401257465625e-07, "loss": 0.8235, "step": 4627 }, { "epoch": 0.9489440229649375, "grad_norm": 1.2841492891311646, "learning_rate": 1.3644450945971222e-07, "loss": 0.8552, "step": 4628 }, { "epoch": 0.9491490670494156, "grad_norm": 1.2250983715057373, "learning_rate": 1.353532443918393e-07, "loss": 0.8804, "step": 4629 }, { "epoch": 0.9493541111338938, "grad_norm": 1.176269292831421, "learning_rate": 1.342663310243497e-07, "loss": 0.9355, "step": 4630 }, { "epoch": 0.949559155218372, "grad_norm": 1.301929235458374, "learning_rate": 1.3318376983672222e-07, "loss": 0.8962, "step": 4631 }, { "epoch": 0.9497641993028502, "grad_norm": 1.3157141208648682, "learning_rate": 1.3210556130652031e-07, "loss": 0.8614, "step": 4632 }, { "epoch": 0.9499692433873282, "grad_norm": 1.313792109489441, "learning_rate": 1.3103170590938574e-07, "loss": 0.8152, "step": 4633 }, { "epoch": 0.9501742874718064, "grad_norm": 1.2660613059997559, "learning_rate": 1.2996220411903958e-07, "loss": 0.8685, "step": 4634 }, { "epoch": 0.9503793315562846, "grad_norm": 1.268424391746521, "learning_rate": 1.2889705640728445e-07, "loss": 0.8729, "step": 4635 }, { "epoch": 0.9505843756407628, "grad_norm": 1.3738696575164795, "learning_rate": 1.278362632439989e-07, "loss": 0.8722, "step": 4636 }, { "epoch": 0.9507894197252409, "grad_norm": 1.26639986038208, "learning_rate": 1.2677982509714415e-07, "loss": 0.8698, "step": 4637 }, { "epoch": 0.9509944638097191, "grad_norm": 1.1846460103988647, "learning_rate": 1.2572774243275632e-07, "loss": 0.8434, "step": 4638 }, { "epoch": 0.9511995078941973, "grad_norm": 1.1380106210708618, "learning_rate": 1.2468001571495415e-07, "loss": 0.725, "step": 4639 }, { "epoch": 0.9514045519786755, "grad_norm": 1.381392478942871, "learning_rate": 1.2363664540593013e-07, "loss": 0.8327, "step": 4640 }, { "epoch": 0.9516095960631535, "grad_norm": 1.2864375114440918, "learning_rate": 1.2259763196596276e-07, "loss": 0.8671, "step": 4641 }, { "epoch": 0.9518146401476317, "grad_norm": 1.207381010055542, "learning_rate": 1.2156297585339872e-07, "loss": 0.8985, "step": 4642 }, { "epoch": 0.9520196842321099, "grad_norm": 1.255513310432434, "learning_rate": 1.2053267752466846e-07, "loss": 0.8911, "step": 4643 }, { "epoch": 0.9522247283165881, "grad_norm": 1.2368521690368652, "learning_rate": 1.1950673743427955e-07, "loss": 0.8487, "step": 4644 }, { "epoch": 0.9524297724010662, "grad_norm": 1.273232102394104, "learning_rate": 1.1848515603481769e-07, "loss": 0.8376, "step": 4645 }, { "epoch": 0.9526348164855444, "grad_norm": 1.2188396453857422, "learning_rate": 1.1746793377694022e-07, "loss": 0.8946, "step": 4646 }, { "epoch": 0.9528398605700226, "grad_norm": 1.1340837478637695, "learning_rate": 1.1645507110938925e-07, "loss": 0.8559, "step": 4647 }, { "epoch": 0.9530449046545008, "grad_norm": 1.3418922424316406, "learning_rate": 1.154465684789785e-07, "loss": 0.868, "step": 4648 }, { "epoch": 0.9532499487389788, "grad_norm": 1.257137417793274, "learning_rate": 1.1444242633060099e-07, "loss": 0.8646, "step": 4649 }, { "epoch": 0.953454992823457, "grad_norm": 1.2781740427017212, "learning_rate": 1.1344264510722347e-07, "loss": 0.9388, "step": 4650 }, { "epoch": 0.9536600369079352, "grad_norm": 1.2306692600250244, "learning_rate": 1.1244722524989204e-07, "loss": 0.8628, "step": 4651 }, { "epoch": 0.9538650809924134, "grad_norm": 1.2377136945724487, "learning_rate": 1.1145616719772545e-07, "loss": 0.8872, "step": 4652 }, { "epoch": 0.9540701250768915, "grad_norm": 1.254402995109558, "learning_rate": 1.1046947138792175e-07, "loss": 0.9093, "step": 4653 }, { "epoch": 0.9542751691613697, "grad_norm": 1.151558756828308, "learning_rate": 1.0948713825575164e-07, "loss": 0.8024, "step": 4654 }, { "epoch": 0.9544802132458479, "grad_norm": 1.1896770000457764, "learning_rate": 1.0850916823456404e-07, "loss": 0.871, "step": 4655 }, { "epoch": 0.954685257330326, "grad_norm": 1.180351734161377, "learning_rate": 1.075355617557805e-07, "loss": 0.8306, "step": 4656 }, { "epoch": 0.9548903014148041, "grad_norm": 1.2476818561553955, "learning_rate": 1.0656631924889749e-07, "loss": 0.8337, "step": 4657 }, { "epoch": 0.9550953454992823, "grad_norm": 1.2767499685287476, "learning_rate": 1.0560144114149073e-07, "loss": 0.841, "step": 4658 }, { "epoch": 0.9553003895837605, "grad_norm": 1.2962859869003296, "learning_rate": 1.0464092785920643e-07, "loss": 0.8695, "step": 4659 }, { "epoch": 0.9555054336682387, "grad_norm": 1.3529609441757202, "learning_rate": 1.0368477982576564e-07, "loss": 0.8096, "step": 4660 }, { "epoch": 0.9557104777527168, "grad_norm": 1.2239339351654053, "learning_rate": 1.0273299746296317e-07, "loss": 0.8302, "step": 4661 }, { "epoch": 0.955915521837195, "grad_norm": 1.1990638971328735, "learning_rate": 1.0178558119067316e-07, "loss": 0.8575, "step": 4662 }, { "epoch": 0.9561205659216732, "grad_norm": 1.2217538356781006, "learning_rate": 1.0084253142683686e-07, "loss": 0.9366, "step": 4663 }, { "epoch": 0.9563256100061513, "grad_norm": 1.2831681966781616, "learning_rate": 9.99038485874726e-08, "loss": 0.859, "step": 4664 }, { "epoch": 0.9565306540906295, "grad_norm": 1.3994057178497314, "learning_rate": 9.896953308667134e-08, "loss": 0.8822, "step": 4665 }, { "epoch": 0.9567356981751076, "grad_norm": 1.1630915403366089, "learning_rate": 9.803958533660008e-08, "loss": 0.8205, "step": 4666 }, { "epoch": 0.9569407422595858, "grad_norm": 1.298198938369751, "learning_rate": 9.711400574749507e-08, "loss": 0.8422, "step": 4667 }, { "epoch": 0.957145786344064, "grad_norm": 1.1372495889663696, "learning_rate": 9.619279472766863e-08, "loss": 0.8684, "step": 4668 }, { "epoch": 0.9573508304285422, "grad_norm": 1.2869994640350342, "learning_rate": 9.527595268350454e-08, "loss": 0.9186, "step": 4669 }, { "epoch": 0.9575558745130203, "grad_norm": 1.2007502317428589, "learning_rate": 9.436348001945705e-08, "loss": 0.8962, "step": 4670 }, { "epoch": 0.9577609185974985, "grad_norm": 1.2400538921356201, "learning_rate": 9.345537713805863e-08, "loss": 0.8814, "step": 4671 }, { "epoch": 0.9579659626819766, "grad_norm": 1.2920795679092407, "learning_rate": 9.255164443990994e-08, "loss": 0.8492, "step": 4672 }, { "epoch": 0.9581710067664548, "grad_norm": 1.2320311069488525, "learning_rate": 9.165228232368317e-08, "loss": 0.8928, "step": 4673 }, { "epoch": 0.9583760508509329, "grad_norm": 1.2581301927566528, "learning_rate": 9.075729118612431e-08, "loss": 0.875, "step": 4674 }, { "epoch": 0.9585810949354111, "grad_norm": 1.2433971166610718, "learning_rate": 8.986667142205197e-08, "loss": 0.8234, "step": 4675 }, { "epoch": 0.9587861390198893, "grad_norm": 1.276939868927002, "learning_rate": 8.898042342435298e-08, "loss": 0.7511, "step": 4676 }, { "epoch": 0.9589911831043675, "grad_norm": 1.2121111154556274, "learning_rate": 8.809854758399017e-08, "loss": 0.8888, "step": 4677 }, { "epoch": 0.9591962271888456, "grad_norm": 1.1732670068740845, "learning_rate": 8.722104428999234e-08, "loss": 0.8932, "step": 4678 }, { "epoch": 0.9594012712733238, "grad_norm": 1.2459310293197632, "learning_rate": 8.634791392946429e-08, "loss": 0.9081, "step": 4679 }, { "epoch": 0.9596063153578019, "grad_norm": 1.4078184366226196, "learning_rate": 8.547915688757902e-08, "loss": 0.9741, "step": 4680 }, { "epoch": 0.9598113594422801, "grad_norm": 1.2201987504959106, "learning_rate": 8.461477354758219e-08, "loss": 0.88, "step": 4681 }, { "epoch": 0.9600164035267582, "grad_norm": 1.3061840534210205, "learning_rate": 8.375476429078543e-08, "loss": 0.8429, "step": 4682 }, { "epoch": 0.9602214476112364, "grad_norm": 1.1358985900878906, "learning_rate": 8.289912949657752e-08, "loss": 0.8788, "step": 4683 }, { "epoch": 0.9604264916957146, "grad_norm": 1.236371636390686, "learning_rate": 8.204786954241206e-08, "loss": 0.8625, "step": 4684 }, { "epoch": 0.9606315357801928, "grad_norm": 1.2334392070770264, "learning_rate": 8.120098480381422e-08, "loss": 0.8728, "step": 4685 }, { "epoch": 0.9608365798646709, "grad_norm": 1.3510209321975708, "learning_rate": 8.035847565438181e-08, "loss": 0.8936, "step": 4686 }, { "epoch": 0.961041623949149, "grad_norm": 1.234608769416809, "learning_rate": 7.952034246577977e-08, "loss": 0.8944, "step": 4687 }, { "epoch": 0.9612466680336272, "grad_norm": 1.3241586685180664, "learning_rate": 7.868658560774234e-08, "loss": 0.777, "step": 4688 }, { "epoch": 0.9614517121181054, "grad_norm": 1.278234601020813, "learning_rate": 7.785720544807195e-08, "loss": 0.8953, "step": 4689 }, { "epoch": 0.9616567562025835, "grad_norm": 1.2731072902679443, "learning_rate": 7.703220235264708e-08, "loss": 0.8083, "step": 4690 }, { "epoch": 0.9618618002870617, "grad_norm": 1.3268489837646484, "learning_rate": 7.621157668540546e-08, "loss": 0.9377, "step": 4691 }, { "epoch": 0.9620668443715399, "grad_norm": 1.2547259330749512, "learning_rate": 7.539532880836087e-08, "loss": 0.9575, "step": 4692 }, { "epoch": 0.9622718884560181, "grad_norm": 1.2951291799545288, "learning_rate": 7.458345908159414e-08, "loss": 0.9268, "step": 4693 }, { "epoch": 0.9624769325404962, "grad_norm": 1.2778573036193848, "learning_rate": 7.377596786325437e-08, "loss": 0.876, "step": 4694 }, { "epoch": 0.9626819766249743, "grad_norm": 1.2693642377853394, "learning_rate": 7.297285550955769e-08, "loss": 0.87, "step": 4695 }, { "epoch": 0.9628870207094525, "grad_norm": 1.2490558624267578, "learning_rate": 7.21741223747907e-08, "loss": 0.766, "step": 4696 }, { "epoch": 0.9630920647939307, "grad_norm": 1.2246730327606201, "learning_rate": 7.137976881130826e-08, "loss": 0.8202, "step": 4697 }, { "epoch": 0.9632971088784088, "grad_norm": 1.2691813707351685, "learning_rate": 7.058979516953001e-08, "loss": 0.8532, "step": 4698 }, { "epoch": 0.963502152962887, "grad_norm": 1.3759220838546753, "learning_rate": 6.980420179794834e-08, "loss": 0.9135, "step": 4699 }, { "epoch": 0.9637071970473652, "grad_norm": 1.2440475225448608, "learning_rate": 6.902298904312043e-08, "loss": 0.8808, "step": 4700 }, { "epoch": 0.9639122411318434, "grad_norm": 1.272748351097107, "learning_rate": 6.824615724966843e-08, "loss": 0.8801, "step": 4701 }, { "epoch": 0.9641172852163216, "grad_norm": 1.389762282371521, "learning_rate": 6.747370676028819e-08, "loss": 0.8537, "step": 4702 }, { "epoch": 0.9643223293007996, "grad_norm": 1.1510100364685059, "learning_rate": 6.670563791573715e-08, "loss": 0.862, "step": 4703 }, { "epoch": 0.9645273733852778, "grad_norm": 1.3011268377304077, "learning_rate": 6.594195105484203e-08, "loss": 0.7926, "step": 4704 }, { "epoch": 0.964732417469756, "grad_norm": 1.1346731185913086, "learning_rate": 6.51826465144978e-08, "loss": 0.9184, "step": 4705 }, { "epoch": 0.9649374615542342, "grad_norm": 1.217109203338623, "learning_rate": 6.442772462966429e-08, "loss": 0.8533, "step": 4706 }, { "epoch": 0.9651425056387123, "grad_norm": 1.3648847341537476, "learning_rate": 6.367718573336845e-08, "loss": 0.8904, "step": 4707 }, { "epoch": 0.9653475497231905, "grad_norm": 1.2262274026870728, "learning_rate": 6.293103015670321e-08, "loss": 0.854, "step": 4708 }, { "epoch": 0.9655525938076687, "grad_norm": 1.235361099243164, "learning_rate": 6.21892582288286e-08, "loss": 0.9035, "step": 4709 }, { "epoch": 0.9657576378921469, "grad_norm": 1.2684611082077026, "learning_rate": 6.145187027697063e-08, "loss": 0.8312, "step": 4710 }, { "epoch": 0.9659626819766249, "grad_norm": 1.2726603746414185, "learning_rate": 6.071886662642245e-08, "loss": 0.9536, "step": 4711 }, { "epoch": 0.9661677260611031, "grad_norm": 1.3106192350387573, "learning_rate": 5.999024760054095e-08, "loss": 0.899, "step": 4712 }, { "epoch": 0.9663727701455813, "grad_norm": 1.155292272567749, "learning_rate": 5.926601352075012e-08, "loss": 0.8489, "step": 4713 }, { "epoch": 0.9665778142300595, "grad_norm": 1.2800779342651367, "learning_rate": 5.854616470653884e-08, "loss": 0.9161, "step": 4714 }, { "epoch": 0.9667828583145376, "grad_norm": 1.2988187074661255, "learning_rate": 5.78307014754631e-08, "loss": 0.8758, "step": 4715 }, { "epoch": 0.9669879023990158, "grad_norm": 1.179113745689392, "learning_rate": 5.711962414314265e-08, "loss": 0.8458, "step": 4716 }, { "epoch": 0.967192946483494, "grad_norm": 1.2836605310440063, "learning_rate": 5.641293302326323e-08, "loss": 0.835, "step": 4717 }, { "epoch": 0.9673979905679722, "grad_norm": 1.373349905014038, "learning_rate": 5.571062842757324e-08, "loss": 0.9309, "step": 4718 }, { "epoch": 0.9676030346524502, "grad_norm": 1.1898304224014282, "learning_rate": 5.501271066589042e-08, "loss": 0.8893, "step": 4719 }, { "epoch": 0.9678080787369284, "grad_norm": 1.2558872699737549, "learning_rate": 5.431918004609405e-08, "loss": 0.8589, "step": 4720 }, { "epoch": 0.9680131228214066, "grad_norm": 1.2924448251724243, "learning_rate": 5.3630036874128263e-08, "loss": 0.8586, "step": 4721 }, { "epoch": 0.9682181669058848, "grad_norm": 1.2259674072265625, "learning_rate": 5.2945281454003236e-08, "loss": 0.8812, "step": 4722 }, { "epoch": 0.9684232109903629, "grad_norm": 1.283638834953308, "learning_rate": 5.2264914087792885e-08, "loss": 0.9315, "step": 4723 }, { "epoch": 0.9686282550748411, "grad_norm": 1.3402254581451416, "learning_rate": 5.1588935075634896e-08, "loss": 0.8822, "step": 4724 }, { "epoch": 0.9688332991593193, "grad_norm": 1.3294905424118042, "learning_rate": 5.091734471573073e-08, "loss": 0.8863, "step": 4725 }, { "epoch": 0.9690383432437975, "grad_norm": 1.1872228384017944, "learning_rate": 5.025014330434785e-08, "loss": 0.8662, "step": 4726 }, { "epoch": 0.9692433873282755, "grad_norm": 1.30623197555542, "learning_rate": 4.958733113581415e-08, "loss": 0.9162, "step": 4727 }, { "epoch": 0.9694484314127537, "grad_norm": 1.3535748720169067, "learning_rate": 4.892890850252463e-08, "loss": 0.9109, "step": 4728 }, { "epoch": 0.9696534754972319, "grad_norm": 1.3454312086105347, "learning_rate": 4.827487569493583e-08, "loss": 0.8902, "step": 4729 }, { "epoch": 0.9698585195817101, "grad_norm": 1.2697550058364868, "learning_rate": 4.762523300157029e-08, "loss": 0.9235, "step": 4730 }, { "epoch": 0.9700635636661882, "grad_norm": 1.2812116146087646, "learning_rate": 4.697998070900989e-08, "loss": 0.8121, "step": 4731 }, { "epoch": 0.9702686077506664, "grad_norm": 1.2643818855285645, "learning_rate": 4.6339119101902475e-08, "loss": 0.9487, "step": 4732 }, { "epoch": 0.9704736518351446, "grad_norm": 1.3134256601333618, "learning_rate": 4.5702648462959684e-08, "loss": 0.9007, "step": 4733 }, { "epoch": 0.9706786959196227, "grad_norm": 1.286794662475586, "learning_rate": 4.5070569072952485e-08, "loss": 0.8828, "step": 4734 }, { "epoch": 0.9708837400041009, "grad_norm": 1.4907666444778442, "learning_rate": 4.4442881210718935e-08, "loss": 0.9271, "step": 4735 }, { "epoch": 0.971088784088579, "grad_norm": 1.310200572013855, "learning_rate": 4.381958515315643e-08, "loss": 0.9078, "step": 4736 }, { "epoch": 0.9712938281730572, "grad_norm": 1.1453328132629395, "learning_rate": 4.320068117522835e-08, "loss": 0.8381, "step": 4737 }, { "epoch": 0.9714988722575354, "grad_norm": 1.2771694660186768, "learning_rate": 4.258616954995631e-08, "loss": 0.8429, "step": 4738 }, { "epoch": 0.9717039163420136, "grad_norm": 1.174309492111206, "learning_rate": 4.197605054842791e-08, "loss": 0.8495, "step": 4739 }, { "epoch": 0.9719089604264917, "grad_norm": 1.2465556859970093, "learning_rate": 4.1370324439792274e-08, "loss": 0.9447, "step": 4740 }, { "epoch": 0.9721140045109699, "grad_norm": 1.3831989765167236, "learning_rate": 4.0768991491260124e-08, "loss": 0.9341, "step": 4741 }, { "epoch": 0.972319048595448, "grad_norm": 1.23193359375, "learning_rate": 4.0172051968101474e-08, "loss": 0.882, "step": 4742 }, { "epoch": 0.9725240926799262, "grad_norm": 1.3260341882705688, "learning_rate": 3.957950613365347e-08, "loss": 0.935, "step": 4743 }, { "epoch": 0.9727291367644043, "grad_norm": 1.2473682165145874, "learning_rate": 3.8991354249311444e-08, "loss": 0.874, "step": 4744 }, { "epoch": 0.9729341808488825, "grad_norm": 1.2827497720718384, "learning_rate": 3.840759657453452e-08, "loss": 0.8122, "step": 4745 }, { "epoch": 0.9731392249333607, "grad_norm": 1.2780505418777466, "learning_rate": 3.782823336683894e-08, "loss": 0.8462, "step": 4746 }, { "epoch": 0.9733442690178389, "grad_norm": 1.1496644020080566, "learning_rate": 3.7253264881809137e-08, "loss": 0.8034, "step": 4747 }, { "epoch": 0.973549313102317, "grad_norm": 1.2375913858413696, "learning_rate": 3.668269137308666e-08, "loss": 0.8525, "step": 4748 }, { "epoch": 0.9737543571867951, "grad_norm": 1.181255578994751, "learning_rate": 3.611651309237241e-08, "loss": 0.8333, "step": 4749 }, { "epoch": 0.9739594012712733, "grad_norm": 1.2738126516342163, "learning_rate": 3.5554730289434344e-08, "loss": 0.8911, "step": 4750 }, { "epoch": 0.9741644453557515, "grad_norm": 1.3109666109085083, "learning_rate": 3.499734321209425e-08, "loss": 0.9064, "step": 4751 }, { "epoch": 0.9743694894402296, "grad_norm": 1.254602074623108, "learning_rate": 3.4444352106242086e-08, "loss": 0.8672, "step": 4752 }, { "epoch": 0.9745745335247078, "grad_norm": 1.2689441442489624, "learning_rate": 3.389575721582161e-08, "loss": 0.803, "step": 4753 }, { "epoch": 0.974779577609186, "grad_norm": 1.3104192018508911, "learning_rate": 3.335155878284257e-08, "loss": 0.9062, "step": 4754 }, { "epoch": 0.9749846216936642, "grad_norm": 1.324240803718567, "learning_rate": 3.281175704737294e-08, "loss": 0.9187, "step": 4755 }, { "epoch": 0.9751896657781423, "grad_norm": 1.2783982753753662, "learning_rate": 3.227635224754111e-08, "loss": 0.9327, "step": 4756 }, { "epoch": 0.9753947098626204, "grad_norm": 1.2920770645141602, "learning_rate": 3.174534461953593e-08, "loss": 0.8804, "step": 4757 }, { "epoch": 0.9755997539470986, "grad_norm": 1.3331310749053955, "learning_rate": 3.121873439760781e-08, "loss": 0.8694, "step": 4758 }, { "epoch": 0.9758047980315768, "grad_norm": 1.3594595193862915, "learning_rate": 3.069652181406424e-08, "loss": 0.8796, "step": 4759 }, { "epoch": 0.9760098421160549, "grad_norm": 1.2312462329864502, "learning_rate": 3.017870709927651e-08, "loss": 0.8738, "step": 4760 }, { "epoch": 0.9762148862005331, "grad_norm": 1.3440643548965454, "learning_rate": 2.966529048167188e-08, "loss": 0.9209, "step": 4761 }, { "epoch": 0.9764199302850113, "grad_norm": 1.3585147857666016, "learning_rate": 2.915627218774142e-08, "loss": 0.9156, "step": 4762 }, { "epoch": 0.9766249743694895, "grad_norm": 1.2525808811187744, "learning_rate": 2.8651652442033275e-08, "loss": 0.9067, "step": 4763 }, { "epoch": 0.9768300184539676, "grad_norm": 1.275389552116394, "learning_rate": 2.815143146715604e-08, "loss": 0.8511, "step": 4764 }, { "epoch": 0.9770350625384457, "grad_norm": 1.2695668935775757, "learning_rate": 2.7655609483776546e-08, "loss": 0.9077, "step": 4765 }, { "epoch": 0.9772401066229239, "grad_norm": 1.2902493476867676, "learning_rate": 2.7164186710623152e-08, "loss": 0.7946, "step": 4766 }, { "epoch": 0.9774451507074021, "grad_norm": 1.1875089406967163, "learning_rate": 2.667716336448356e-08, "loss": 0.7842, "step": 4767 }, { "epoch": 0.9776501947918802, "grad_norm": 1.283764362335205, "learning_rate": 2.6194539660202578e-08, "loss": 0.8692, "step": 4768 }, { "epoch": 0.9778552388763584, "grad_norm": 1.2340550422668457, "learning_rate": 2.5716315810684346e-08, "loss": 0.8406, "step": 4769 }, { "epoch": 0.9780602829608366, "grad_norm": 1.212339997291565, "learning_rate": 2.524249202689566e-08, "loss": 0.8745, "step": 4770 }, { "epoch": 0.9782653270453148, "grad_norm": 1.2085214853286743, "learning_rate": 2.477306851785821e-08, "loss": 0.9352, "step": 4771 }, { "epoch": 0.978470371129793, "grad_norm": 1.3221185207366943, "learning_rate": 2.430804549065302e-08, "loss": 0.8868, "step": 4772 }, { "epoch": 0.978675415214271, "grad_norm": 1.2002718448638916, "learning_rate": 2.3847423150421545e-08, "loss": 0.8669, "step": 4773 }, { "epoch": 0.9788804592987492, "grad_norm": 1.247147798538208, "learning_rate": 2.3391201700363464e-08, "loss": 0.8351, "step": 4774 }, { "epoch": 0.9790855033832274, "grad_norm": 1.3586010932922363, "learning_rate": 2.293938134173779e-08, "loss": 0.9316, "step": 4775 }, { "epoch": 0.9792905474677056, "grad_norm": 1.2607026100158691, "learning_rate": 2.2491962273859524e-08, "loss": 0.8912, "step": 4776 }, { "epoch": 0.9794955915521837, "grad_norm": 1.23208487033844, "learning_rate": 2.2048944694104123e-08, "loss": 0.8127, "step": 4777 }, { "epoch": 0.9797006356366619, "grad_norm": 1.102891445159912, "learning_rate": 2.1610328797904145e-08, "loss": 0.8074, "step": 4778 }, { "epoch": 0.9799056797211401, "grad_norm": 1.213950753211975, "learning_rate": 2.1176114778751478e-08, "loss": 0.8807, "step": 4779 }, { "epoch": 0.9801107238056183, "grad_norm": 1.2052127122879028, "learning_rate": 2.074630282819623e-08, "loss": 0.8809, "step": 4780 }, { "epoch": 0.9803157678900963, "grad_norm": 1.2717236280441284, "learning_rate": 2.032089313584562e-08, "loss": 0.8557, "step": 4781 }, { "epoch": 0.9805208119745745, "grad_norm": 1.1754846572875977, "learning_rate": 1.989988588936509e-08, "loss": 0.8525, "step": 4782 }, { "epoch": 0.9807258560590527, "grad_norm": 1.2098315954208374, "learning_rate": 1.9483281274479405e-08, "loss": 0.8024, "step": 4783 }, { "epoch": 0.9809309001435309, "grad_norm": 1.2325433492660522, "learning_rate": 1.9071079474968224e-08, "loss": 0.812, "step": 4784 }, { "epoch": 0.981135944228009, "grad_norm": 1.3555418252944946, "learning_rate": 1.8663280672671645e-08, "loss": 0.9474, "step": 4785 }, { "epoch": 0.9813409883124872, "grad_norm": 1.1520757675170898, "learning_rate": 1.825988504748688e-08, "loss": 0.821, "step": 4786 }, { "epoch": 0.9815460323969654, "grad_norm": 1.3058720827102661, "learning_rate": 1.7860892777367133e-08, "loss": 0.8693, "step": 4787 }, { "epoch": 0.9817510764814436, "grad_norm": 1.3222105503082275, "learning_rate": 1.7466304038324945e-08, "loss": 0.8347, "step": 4788 }, { "epoch": 0.9819561205659216, "grad_norm": 1.2526148557662964, "learning_rate": 1.7076119004429958e-08, "loss": 0.8629, "step": 4789 }, { "epoch": 0.9821611646503998, "grad_norm": 1.3313342332839966, "learning_rate": 1.6690337847807824e-08, "loss": 0.8613, "step": 4790 }, { "epoch": 0.982366208734878, "grad_norm": 1.1598621606826782, "learning_rate": 1.630896073864352e-08, "loss": 0.8125, "step": 4791 }, { "epoch": 0.9825712528193562, "grad_norm": 1.3019027709960938, "learning_rate": 1.5931987845176912e-08, "loss": 0.8981, "step": 4792 }, { "epoch": 0.9827762969038343, "grad_norm": 1.1974587440490723, "learning_rate": 1.5559419333707193e-08, "loss": 0.9243, "step": 4793 }, { "epoch": 0.9829813409883125, "grad_norm": 1.2638607025146484, "learning_rate": 1.5191255368589562e-08, "loss": 0.979, "step": 4794 }, { "epoch": 0.9831863850727907, "grad_norm": 1.2962082624435425, "learning_rate": 1.4827496112235218e-08, "loss": 0.9314, "step": 4795 }, { "epoch": 0.9833914291572688, "grad_norm": 1.4194027185440063, "learning_rate": 1.4468141725114682e-08, "loss": 0.8133, "step": 4796 }, { "epoch": 0.9835964732417469, "grad_norm": 1.3731626272201538, "learning_rate": 1.411319236575337e-08, "loss": 0.8928, "step": 4797 }, { "epoch": 0.9838015173262251, "grad_norm": 1.337334156036377, "learning_rate": 1.3762648190733808e-08, "loss": 0.9043, "step": 4798 }, { "epoch": 0.9840065614107033, "grad_norm": 1.1891751289367676, "learning_rate": 1.3416509354695628e-08, "loss": 0.8052, "step": 4799 }, { "epoch": 0.9842116054951815, "grad_norm": 1.1961523294448853, "learning_rate": 1.3074776010334466e-08, "loss": 0.8956, "step": 4800 }, { "epoch": 0.9844166495796596, "grad_norm": 1.2265945672988892, "learning_rate": 1.2737448308403067e-08, "loss": 0.8354, "step": 4801 }, { "epoch": 0.9846216936641378, "grad_norm": 1.285273551940918, "learning_rate": 1.2404526397711281e-08, "loss": 0.9038, "step": 4802 }, { "epoch": 0.984826737748616, "grad_norm": 1.1777963638305664, "learning_rate": 1.2076010425123853e-08, "loss": 0.8487, "step": 4803 }, { "epoch": 0.9850317818330941, "grad_norm": 1.1969106197357178, "learning_rate": 1.1751900535562633e-08, "loss": 0.9409, "step": 4804 }, { "epoch": 0.9852368259175722, "grad_norm": 1.3527971506118774, "learning_rate": 1.1432196872005474e-08, "loss": 0.8959, "step": 4805 }, { "epoch": 0.9854418700020504, "grad_norm": 1.3096072673797607, "learning_rate": 1.1116899575488449e-08, "loss": 0.9038, "step": 4806 }, { "epoch": 0.9856469140865286, "grad_norm": 1.2848867177963257, "learning_rate": 1.0806008785100297e-08, "loss": 0.86, "step": 4807 }, { "epoch": 0.9858519581710068, "grad_norm": 1.2487194538116455, "learning_rate": 1.0499524637987979e-08, "loss": 0.848, "step": 4808 }, { "epoch": 0.986057002255485, "grad_norm": 1.3076516389846802, "learning_rate": 1.0197447269355564e-08, "loss": 0.7823, "step": 4809 }, { "epoch": 0.9862620463399631, "grad_norm": 1.1849528551101685, "learning_rate": 9.899776812459794e-09, "loss": 0.8388, "step": 4810 }, { "epoch": 0.9864670904244413, "grad_norm": 1.366464614868164, "learning_rate": 9.606513398617846e-09, "loss": 0.8344, "step": 4811 }, { "epoch": 0.9866721345089194, "grad_norm": 1.1470067501068115, "learning_rate": 9.317657157197347e-09, "loss": 0.8606, "step": 4812 }, { "epoch": 0.9868771785933976, "grad_norm": 1.2826861143112183, "learning_rate": 9.033208215626366e-09, "loss": 0.8618, "step": 4813 }, { "epoch": 0.9870822226778757, "grad_norm": 1.2756985425949097, "learning_rate": 8.75316669938675e-09, "loss": 0.8702, "step": 4814 }, { "epoch": 0.9872872667623539, "grad_norm": 1.28975510597229, "learning_rate": 8.477532732016346e-09, "loss": 0.8322, "step": 4815 }, { "epoch": 0.9874923108468321, "grad_norm": 1.1406406164169312, "learning_rate": 8.206306435109002e-09, "loss": 0.8608, "step": 4816 }, { "epoch": 0.9876973549313103, "grad_norm": 1.3426806926727295, "learning_rate": 7.93948792831234e-09, "loss": 0.8241, "step": 4817 }, { "epoch": 0.9879023990157884, "grad_norm": 1.240348219871521, "learning_rate": 7.677077329332205e-09, "loss": 0.8914, "step": 4818 }, { "epoch": 0.9881074431002665, "grad_norm": 1.3089845180511475, "learning_rate": 7.419074753928224e-09, "loss": 0.9352, "step": 4819 }, { "epoch": 0.9883124871847447, "grad_norm": 1.104813814163208, "learning_rate": 7.165480315916018e-09, "loss": 0.8925, "step": 4820 }, { "epoch": 0.9885175312692229, "grad_norm": 1.1805771589279175, "learning_rate": 6.916294127167211e-09, "loss": 0.8924, "step": 4821 }, { "epoch": 0.988722575353701, "grad_norm": 1.1583722829818726, "learning_rate": 6.671516297606095e-09, "loss": 0.8489, "step": 4822 }, { "epoch": 0.9889276194381792, "grad_norm": 1.1947838068008423, "learning_rate": 6.431146935216293e-09, "loss": 0.9174, "step": 4823 }, { "epoch": 0.9891326635226574, "grad_norm": 1.3402700424194336, "learning_rate": 6.195186146034093e-09, "loss": 0.9647, "step": 4824 }, { "epoch": 0.9893377076071356, "grad_norm": 1.3301175832748413, "learning_rate": 5.963634034149568e-09, "loss": 0.9146, "step": 4825 }, { "epoch": 0.9895427516916137, "grad_norm": 1.2165236473083496, "learning_rate": 5.736490701713227e-09, "loss": 0.8251, "step": 4826 }, { "epoch": 0.9897477957760918, "grad_norm": 1.3145322799682617, "learning_rate": 5.513756248924917e-09, "loss": 0.8023, "step": 4827 }, { "epoch": 0.98995283986057, "grad_norm": 1.174120545387268, "learning_rate": 5.295430774041599e-09, "loss": 0.826, "step": 4828 }, { "epoch": 0.9901578839450482, "grad_norm": 1.2390190362930298, "learning_rate": 5.081514373377339e-09, "loss": 0.845, "step": 4829 }, { "epoch": 0.9903629280295263, "grad_norm": 1.296897530555725, "learning_rate": 4.8720071412988734e-09, "loss": 0.7985, "step": 4830 }, { "epoch": 0.9905679721140045, "grad_norm": 1.2202821969985962, "learning_rate": 4.666909170227829e-09, "loss": 0.8488, "step": 4831 }, { "epoch": 0.9907730161984827, "grad_norm": 1.236810326576233, "learning_rate": 4.466220550641831e-09, "loss": 0.8651, "step": 4832 }, { "epoch": 0.9909780602829609, "grad_norm": 1.292959213256836, "learning_rate": 4.269941371073394e-09, "loss": 0.8801, "step": 4833 }, { "epoch": 0.991183104367439, "grad_norm": 1.2247753143310547, "learning_rate": 4.0780717181077015e-09, "loss": 0.872, "step": 4834 }, { "epoch": 0.9913881484519171, "grad_norm": 1.368373990058899, "learning_rate": 3.890611676388156e-09, "loss": 0.8753, "step": 4835 }, { "epoch": 0.9915931925363953, "grad_norm": 1.229924201965332, "learning_rate": 3.707561328608611e-09, "loss": 0.903, "step": 4836 }, { "epoch": 0.9917982366208735, "grad_norm": 1.2382076978683472, "learning_rate": 3.528920755523357e-09, "loss": 0.8718, "step": 4837 }, { "epoch": 0.9920032807053516, "grad_norm": 1.210396647453308, "learning_rate": 3.3546900359360256e-09, "loss": 0.8153, "step": 4838 }, { "epoch": 0.9922083247898298, "grad_norm": 1.3019506931304932, "learning_rate": 3.184869246707356e-09, "loss": 0.8917, "step": 4839 }, { "epoch": 0.992413368874308, "grad_norm": 1.2761454582214355, "learning_rate": 3.0194584627507573e-09, "loss": 0.9318, "step": 4840 }, { "epoch": 0.9926184129587862, "grad_norm": 1.4018549919128418, "learning_rate": 2.858457757038968e-09, "loss": 0.9505, "step": 4841 }, { "epoch": 0.9928234570432644, "grad_norm": 1.27989661693573, "learning_rate": 2.701867200592956e-09, "loss": 0.9211, "step": 4842 }, { "epoch": 0.9930285011277424, "grad_norm": 1.277847170829773, "learning_rate": 2.549686862494127e-09, "loss": 0.8202, "step": 4843 }, { "epoch": 0.9932335452122206, "grad_norm": 1.2692298889160156, "learning_rate": 2.401916809872118e-09, "loss": 0.861, "step": 4844 }, { "epoch": 0.9934385892966988, "grad_norm": 1.188412070274353, "learning_rate": 2.258557107917003e-09, "loss": 0.9361, "step": 4845 }, { "epoch": 0.993643633381177, "grad_norm": 1.25069260597229, "learning_rate": 2.1196078198693072e-09, "loss": 0.8815, "step": 4846 }, { "epoch": 0.9938486774656551, "grad_norm": 1.3354235887527466, "learning_rate": 1.9850690070266633e-09, "loss": 0.7963, "step": 4847 }, { "epoch": 0.9940537215501333, "grad_norm": 1.2576769590377808, "learning_rate": 1.8549407287371534e-09, "loss": 0.9035, "step": 4848 }, { "epoch": 0.9942587656346115, "grad_norm": 1.2411997318267822, "learning_rate": 1.7292230424081902e-09, "loss": 0.8818, "step": 4849 }, { "epoch": 0.9944638097190897, "grad_norm": 1.2368662357330322, "learning_rate": 1.6079160034976337e-09, "loss": 0.9587, "step": 4850 }, { "epoch": 0.9946688538035677, "grad_norm": 1.3012458086013794, "learning_rate": 1.4910196655193443e-09, "loss": 0.8319, "step": 4851 }, { "epoch": 0.9948738978880459, "grad_norm": 1.2074081897735596, "learning_rate": 1.378534080042071e-09, "loss": 0.8642, "step": 4852 }, { "epoch": 0.9950789419725241, "grad_norm": 1.2575501203536987, "learning_rate": 1.2704592966861217e-09, "loss": 0.8686, "step": 4853 }, { "epoch": 0.9952839860570023, "grad_norm": 1.2556512355804443, "learning_rate": 1.166795363127804e-09, "loss": 0.838, "step": 4854 }, { "epoch": 0.9954890301414804, "grad_norm": 1.2726314067840576, "learning_rate": 1.0675423250994244e-09, "loss": 0.8443, "step": 4855 }, { "epoch": 0.9956940742259586, "grad_norm": 1.2407540082931519, "learning_rate": 9.727002263837382e-10, "loss": 0.9097, "step": 4856 }, { "epoch": 0.9958991183104368, "grad_norm": 1.3231528997421265, "learning_rate": 8.822691088195001e-10, "loss": 0.9126, "step": 4857 }, { "epoch": 0.996104162394915, "grad_norm": 1.2550368309020996, "learning_rate": 7.962490123014644e-10, "loss": 0.8509, "step": 4858 }, { "epoch": 0.996309206479393, "grad_norm": 1.276752233505249, "learning_rate": 7.146399747748334e-10, "loss": 0.7455, "step": 4859 }, { "epoch": 0.9965142505638712, "grad_norm": 1.2541906833648682, "learning_rate": 6.374420322408093e-10, "loss": 0.7785, "step": 4860 }, { "epoch": 0.9967192946483494, "grad_norm": 1.1952391862869263, "learning_rate": 5.646552187554832e-10, "loss": 0.8348, "step": 4861 }, { "epoch": 0.9969243387328276, "grad_norm": 1.2634912729263306, "learning_rate": 4.962795664265052e-10, "loss": 0.9375, "step": 4862 }, { "epoch": 0.9971293828173057, "grad_norm": 1.339295506477356, "learning_rate": 4.3231510541974497e-10, "loss": 0.8685, "step": 4863 }, { "epoch": 0.9973344269017839, "grad_norm": 1.1955757141113281, "learning_rate": 3.727618639504105e-10, "loss": 0.8847, "step": 4864 }, { "epoch": 0.9975394709862621, "grad_norm": 1.2981799840927124, "learning_rate": 3.1761986829081937e-10, "loss": 0.9205, "step": 4865 }, { "epoch": 0.9977445150707402, "grad_norm": 1.120650053024292, "learning_rate": 2.668891427659581e-10, "loss": 0.8808, "step": 4866 }, { "epoch": 0.9979495591552183, "grad_norm": 1.377510666847229, "learning_rate": 2.2056970975459223e-10, "loss": 0.9329, "step": 4867 }, { "epoch": 0.9981546032396965, "grad_norm": 1.335145115852356, "learning_rate": 1.7866158969148674e-10, "loss": 0.9, "step": 4868 }, { "epoch": 0.9983596473241747, "grad_norm": 1.2458454370498657, "learning_rate": 1.4116480106407538e-10, "loss": 0.8948, "step": 4869 }, { "epoch": 0.9985646914086529, "grad_norm": 1.257785677909851, "learning_rate": 1.0807936041246081e-10, "loss": 0.8895, "step": 4870 }, { "epoch": 0.998769735493131, "grad_norm": 1.4039311408996582, "learning_rate": 7.940528233274514e-11, "loss": 0.9468, "step": 4871 }, { "epoch": 0.9989747795776092, "grad_norm": 1.302139401435852, "learning_rate": 5.514257947369928e-11, "loss": 0.8447, "step": 4872 }, { "epoch": 0.9991798236620874, "grad_norm": 1.2166184186935425, "learning_rate": 3.529126254009363e-11, "loss": 0.7688, "step": 4873 }, { "epoch": 0.9993848677465655, "grad_norm": 1.1765559911727905, "learning_rate": 1.985134028714697e-11, "loss": 0.8535, "step": 4874 }, { "epoch": 0.9995899118310436, "grad_norm": 1.3132585287094116, "learning_rate": 8.822819527187776e-12, "loss": 0.8153, "step": 4875 }, { "epoch": 0.9997949559155218, "grad_norm": 1.3095860481262207, "learning_rate": 2.2057051252133422e-12, "loss": 0.8657, "step": 4876 }, { "epoch": 1.0, "grad_norm": 1.1805962324142456, "learning_rate": 0.0, "loss": 0.8559, "step": 4877 }, { "epoch": 1.0, "step": 4877, "total_flos": 5.937333329950723e+19, "train_loss": 0.9457440080275075, "train_runtime": 20454.4048, "train_samples_per_second": 30.519, "train_steps_per_second": 0.238 } ], "logging_steps": 1.0, "max_steps": 4877, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.937333329950723e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }